diff --git a/api/main.py b/api/main.py
index ff1f1ab..45dad7a 100644
--- a/api/main.py
+++ b/api/main.py
@@ -91,7 +91,7 @@ YTDLP_COOKIES_FILE = os.getenv("YTDLP_COOKIES_FILE", "").strip()
YTDLP_COOKIES_FROM_BROWSER = os.getenv("YTDLP_COOKIES_FROM_BROWSER", "").strip()
AUDIO_PRODUCT_BRIEF = os.getenv(
"AUDIO_PRODUCT_BRIEF",
- "SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。",
+ "SKG smart massage products for everyday neck-and-shoulder, back, eye, knee, or foot relaxation. Ads should feel premium, clean, trustworthy, and must not make medical efficacy claims.",
).strip()
AUDIO_REWRITE_MODEL = gpt_model_env("AUDIO_REWRITE_MODEL", REWRITE_MODEL)
VOICE_PROVIDER = "azure_openai"
@@ -454,6 +454,7 @@ class CharacterLibraryItem(BaseModel):
folder: str = ""
description: str = ""
prompt_brief: str = ""
+ prompt_brief_zh: str = ""
primary_image: str = ""
images: list[CharacterLibraryImage] = Field(default_factory=list)
@@ -480,6 +481,7 @@ class SubjectTemplateItem(BaseModel):
description: str = ""
note: str = ""
prompt_brief: str = ""
+ prompt_brief_zh: str = ""
source: Literal["database"] = "database"
source_job_id: str = ""
source_frame_idx: int = -1
@@ -534,6 +536,7 @@ class KeyElement(BaseModel):
subject_kind: SubjectKind = "object"
subject_assets: list[SubjectAsset] = Field(default_factory=list)
subject_consensus_brief: str = ""
+ subject_consensus_brief_zh: str = ""
created_at: float = 0.0
@@ -565,6 +568,7 @@ class AudioScript(BaseModel):
source_text: str = ""
source_zh: str = ""
rewritten_text: str = ""
+ rewritten_text_zh: str = ""
speaker_profile: str = ""
rhythm_profile: str = ""
background_audio_profile: str = ""
@@ -2307,7 +2311,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
fallback = _fallback_audio_profile(segments, target_seconds)
if not LLM_API_KEY or not wav.exists():
return fallback
- transcript = _transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript."
+ transcript = _ensure_english(_transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript.")
try:
audio_b64 = base64.b64encode(wav.read_bytes()).decode("ascii")
except Exception:
@@ -2373,12 +2377,15 @@ def _build_audio_intake_sync(job_id: str, wav: Path, segments: list[TranscriptSe
)
-def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]:
+def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str, str]:
fallback = _fallback_audio_script(segments, target_seconds)
+ try:
+ fallback_zh = _translate_text_sync(fallback, "zh", max_tokens=300) if LLM_API_KEY else ""
+ except Exception:
+ fallback_zh = ""
if not LLM_API_KEY:
- return fallback, "LLM_API_KEY 未配置,使用本地 SKG 模板"
+ return fallback, fallback_zh, "LLM_API_KEY 未配置,使用本地 SKG 模板"
source_text = _transcript_join(segments, "en")
- source_zh = _transcript_join(segments, "zh")
min_words, max_words = _voiceover_target_words(target_seconds)
prompt = (
"You are an English short-video voice-over writer for SKG wellness massagers. "
@@ -2392,10 +2399,9 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
"5. Introduce SKG products directly: smart massage, warmth, rhythm, daily neck/back/eye/knee/foot relaxation.\n"
"6. Keep it easy for TTS: short sentences, spoken phrasing, no hashtags, no stage directions, no quotation marks.\n"
"7. If the source transcript is thin, ignore it and write a general SKG product intro.\n"
- 'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
- f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
- f"English transcript:\n{source_text or 'None'}\n\n"
- f"Chinese translation for reference:\n{source_zh or 'None'}"
+ 'Return strict JSON only: {"rewritten_text":"English VO","rewritten_text_zh":"Simplified Chinese mirror for team review"}.\n\n'
+ f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
+ f"English transcript:\n{source_text or 'None'}"
)
try:
resp = llm().chat.completions.create(
@@ -2415,9 +2421,12 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
raw = match.group(0) if match else raw
data = json.loads(raw)
text = str(data.get("rewritten_text", "")).strip()
- return (text or fallback), ""
+ text_zh = str(data.get("rewritten_text_zh", "")).strip()
+ if text and not text_zh:
+ text_zh = _translate_text_sync(text, "zh", max_tokens=300)
+ return (text or fallback), (text_zh or fallback_zh), ""
except Exception as e:
- return fallback, f"改写失败,使用本地模板:{e}"
+ return fallback, fallback_zh, f"改写失败,使用本地模板:{e}"
def _choose_azure_voice_id() -> str:
@@ -2521,7 +2530,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
source_text = _transcript_join(segments, "en")
source_zh = _transcript_join(segments, "zh")
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
- rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
+ rewritten, rewritten_zh, rewrite_error = _rewrite_audio_script_sync(segments, duration)
selected_voice_id = _choose_tts_voice_id()
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
voice_url = ""
@@ -2539,6 +2548,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
source_text=source_text,
source_zh=source_zh,
rewritten_text=rewritten,
+ rewritten_text_zh=rewritten_zh,
speaker_profile=speaker_profile,
rhythm_profile=rhythm_profile,
product_brief=AUDIO_PRODUCT_BRIEF,
@@ -3055,6 +3065,55 @@ class RewriteStoryboardScriptReq(BaseModel):
segments: list[ScriptRewriteSegmentReq] = Field(default_factory=list)
+_TRANSLATION_CACHE: dict[str, str] = {}
+
+
+def _contains_cjk(text: str) -> bool:
+ return bool(re.search(r"[\u3400-\u9fff]", text or ""))
+
+
+def _translate_text_sync(text: str, target: Literal["en", "zh"] = "en", *, max_tokens: int = 700) -> str:
+ text = (text or "").strip()
+ if not text or not LLM_API_KEY:
+ return text
+ target_label = "English" if target == "en" else "Simplified Chinese"
+ prompt = (
+ f"Translate the following TikTok ad planning text into concise natural {target_label}. "
+ "Preserve concrete product, camera, subject, timing, and structure details. "
+ "Do not add commentary, markdown, quotes, or explanations.\n\n"
+ f"Input:\n{text}"
+ )
+ resp = llm().chat.completions.create(
+ model=TRANSLATE_MODEL,
+ messages=[{"role": "user", "content": prompt}],
+ temperature=0.15,
+ max_tokens=max_tokens,
+ )
+ out = (resp.choices[0].message.content or "").strip()
+ if not out:
+ rc = getattr(resp.choices[0].message, "reasoning_content", "") or ""
+ if rc:
+ out = rc.strip().splitlines()[-1].strip()
+ return re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip() or text
+
+
+def _ensure_english(text: str) -> str:
+ text = (text or "").strip()
+ if not text or not _contains_cjk(text):
+ return text
+ key = hashlib.sha256(("en\0" + text).encode("utf-8")).hexdigest()
+ cached = _TRANSLATION_CACHE.get(key)
+ if cached:
+ return cached
+ try:
+ translated = _translate_text_sync(text, "en", max_tokens=max(700, min(3500, len(text) // 2 + 900)))
+ _TRANSLATION_CACHE[key] = translated
+ return translated
+ except Exception as e:
+ print(f"[ensure english fallback] {e}", flush=True)
+ return text
+
+
@app.post("/translate")
def translate_text(req: TranslateReq) -> dict:
"""单条文本翻译(给生图自定义提取元素 zh→en 用)"""
@@ -3092,22 +3151,26 @@ def translate_text(req: TranslateReq) -> dict:
def _fallback_script_rewrite_item(segment: ScriptRewriteSegmentReq, author_intent: str = "") -> dict:
source = (segment.source or "").strip()
- intent = (author_intent or "").strip()
+ intent = _ensure_english(author_intent or "")
role = segment.role or ""
templates = {
- "开场钩子": "你有没有发现,低头久了以后,脖子和肩膀会先替你喊累。",
- "痛点推进": "刷手机、坐电脑、赶通勤叠在一起,肩颈很容易一直绷着放不下来。",
- "利益证明": "SKG 这种挂脖按摩仪,重点就是贴住肩颈位置,把热敷感和揉按感带到真正紧的地方。",
- "方案过渡": "这一段可以直接拍拿起、戴上、贴合,让产品自然进入日常放松场景。",
- "转化收口": "如果你也想把肩颈放松变成每天的小习惯,可以从这台 SKG 开始。",
- "节奏承接": "顺着原片节奏,把这一句落到一个具体的肩颈使用场景里。",
+ "hook": "Have you noticed that after hours of looking down, your neck and shoulders complain before you do?",
+ "pain": "Phone scrolling, desk work, and commuting can keep your neck and shoulders tight all day.",
+ "proof": "An SKG wearable massager sits around the neck and shoulders, bringing warm, rhythmic comfort to the spots that feel tense.",
+ "solution": "This beat can simply show pick up, wear, fit, and relax, so the product enters a normal daily routine.",
+ "cta": "If you want neck-and-shoulder relaxation to become a daily habit, start with this SKG massager.",
+ "bridge": "Follow the source rhythm, but land this line in one specific neck-and-shoulder use moment.",
}
- rewritten = templates.get(role, templates["节奏承接"])
- if source and role not in {"开场钩子", "转化收口"}:
- rewritten = f"{rewritten} 原片这一句的节奏可以保留,但内容换成 SKG 的佩戴和放松体验。"
+ rewritten = templates.get(role, templates["bridge"])
+ if source and role not in {"hook", "cta"}:
+ rewritten = f"{rewritten} Keep the source sentence rhythm, but replace the content with SKG wearing and relaxation experience."
if intent:
- rewritten = f"{rewritten} 语气按作者想法处理:{intent[:44]}。"
- return {"index": segment.index, "text": rewritten[:220]}
+ rewritten = f"{rewritten} Adjust the tone based on the creator note: {intent[:90]}."
+ try:
+ zh = _translate_text_sync(rewritten, "zh", max_tokens=260) if LLM_API_KEY else ""
+ except Exception:
+ zh = ""
+ return {"index": segment.index, "text": rewritten[:260], "text_zh": zh}
def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentReq], author_intent: str = "") -> list[dict]:
@@ -3123,7 +3186,7 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
raw_items = data.get("items") if isinstance(data, dict) else data
if not isinstance(raw_items, list):
raw_items = []
- by_index: dict[int, str] = {}
+ by_index: dict[int, tuple[str, str]] = {}
for item in raw_items:
if not isinstance(item, dict):
continue
@@ -3132,19 +3195,27 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
except Exception:
continue
value = str(item.get("text") or item.get("rewritten_text") or "").strip()
+ value_zh = str(item.get("text_zh") or item.get("rewritten_text_zh") or "").strip()
if value:
- by_index[idx] = re.sub(r"\s+", " ", value).strip()[:260]
- return [
- {"index": segment.index, "text": by_index.get(segment.index) or _fallback_script_rewrite_item(segment, author_intent)["text"]}
- for segment in requested
- ]
+ by_index[idx] = (re.sub(r"\s+", " ", value).strip()[:260], re.sub(r"\s+", " ", value_zh).strip()[:260])
+ items = []
+ for segment in requested:
+ fallback = _fallback_script_rewrite_item(segment, author_intent)
+ text, text_zh = by_index.get(segment.index, ("", ""))
+ if text and not text_zh:
+ try:
+ text_zh = _translate_text_sync(text, "zh", max_tokens=260) if LLM_API_KEY else ""
+ except Exception:
+ text_zh = ""
+ items.append({"index": segment.index, "text": text or fallback["text"], "text_zh": text_zh or fallback.get("text_zh", "")})
+ return items
def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dict]:
segments = [segment for segment in req.segments if (segment.source or segment.current_text).strip()]
if not segments:
return []
- author_intent = (req.author_intent or "").strip()
+ author_intent = _ensure_english(req.author_intent or "")
if not LLM_API_KEY:
return [_fallback_script_rewrite_item(segment, author_intent) for segment in segments]
payload = [
@@ -3152,26 +3223,27 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
"index": segment.index,
"time": f"{segment.start:.1f}-{segment.end:.1f}s",
"role": segment.role,
- "source_reference": segment.source,
- "current_voiceover": segment.current_text,
+ "source_reference": _ensure_english(segment.source),
+ "current_voiceover": _ensure_english(segment.current_text),
}
for segment in segments
]
prompt = (
- "你是信息流广告脚本文案改写师。任务:基于原参考文案的节奏和信息结构,把每段改写成 SKG 挂脖肩颈按摩仪的新口播文案。\n"
- "硬规则:\n"
- "1. 输出中文短视频口播,不要英文,不要舞台说明,不要引号。\n"
- "2. 不逐字翻译原文,不保留原品牌、价格、优惠码、平台话术;只参考节奏、钩子、痛点、转化结构。\n"
- "3. 产品固定为套在脖子上的 U 形肩颈按摩仪,表达肩颈紧绷、久坐低头、热敷感、揉按感、佩戴放松和日常使用场景。\n"
- "4. 避免医疗疗效、治疗、治愈、止痛等强功效承诺。\n"
- "5. 每段尽量短,适配该段时间;保持自然创作者口吻。\n"
- "6. mode=all 时,整片要前后连贯;mode=segment 时,只改给定段落但仍要贴合上下文风格。\n"
- f"作者想法:{author_intent or '没有额外想法,按原片节奏改成自然卖点口播。'}\n"
- f"改写模式:{req.mode}\n"
- f"SKG 产品背景:{AUDIO_PRODUCT_BRIEF}\n\n"
- "输入段落 JSON:\n"
+ "You are an information-feed ad voice-over rewrite specialist. Rewrite each segment into a new ENGLISH SKG neck-and-shoulder massager voice-over line while preserving the source rhythm and information structure.\n"
+ "Hard rules:\n"
+ "1. The main text field must be English short-video VO. No stage directions, no quotes.\n"
+ "2. Do not translate word-for-word. Do not keep the original brand, price, discount code, platform CTA, or exact claims; only reuse rhythm, hook, pain-point, proof, and conversion structure.\n"
+ "3. The product is a U-shaped neck-and-shoulder wearable massager worn around the neck. Express neck/shoulder tension, desk posture, looking down, warmth, kneading-like comfort, wearing, relaxation, and daily use.\n"
+ "4. Avoid medical treatment, cure, pain elimination, clinical, or disease claims.\n"
+ "5. Keep each segment short enough for its time range and natural for a creator voice.\n"
+ "6. If mode=all, make the whole piece coherent; if mode=segment, rewrite only the given segment while matching the broader style.\n"
+ "7. Also return a Simplified Chinese mirror for team review in text_zh; it is not for model prompts.\n"
+ f"Creator note: {author_intent or 'No extra note; follow the source pacing and turn it into natural SKG product VO.'}\n"
+ f"Rewrite mode: {req.mode}\n"
+ f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
+ "Input segments JSON:\n"
+ json.dumps(payload, ensure_ascii=False)
- + '\n\n只输出严格 JSON:{"items":[{"index":0,"text":"改写后的中文口播"}]}'
+ + '\n\nReturn strict JSON only: {"items":[{"index":0,"text":"rewritten English VO","text_zh":"中文镜像"}]}'
)
models = []
for model in [AUDIO_REWRITE_MODEL, ASR_FALLBACK_MODEL, TRANSLATE_MODEL]:
@@ -3182,7 +3254,7 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
resp = llm().chat.completions.create(
model=model,
messages=[
- {"role": "system", "content": "只返回合法 JSON,不要 markdown,不要解释。"},
+ {"role": "system", "content": "Return valid JSON only. No markdown. No explanation."},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
@@ -3950,6 +4022,7 @@ class UpdateElementReq(BaseModel):
name_en: str | None = None
position: str | None = None
subject_consensus_brief: str | None = None
+ subject_consensus_brief_zh: str | None = None
class GenerateSceneAssetReq(BaseModel):
@@ -3998,8 +4071,8 @@ class GenerateSubjectAssetsReq(BaseModel):
def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) -> str:
if not profile:
return ""
- prompt_summary = (profile.prompt_summary or "").strip()
- resolved_summary = (profile.resolved_summary or "").strip()
+ prompt_summary = _ensure_english(profile.prompt_summary or "")
+ resolved_summary = _ensure_english(profile.resolved_summary or "")
if prompt_summary:
body = prompt_summary[:1400]
else:
@@ -4013,7 +4086,7 @@ def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) ->
("hair style", profile.hair),
("commercial mood", profile.mood),
]
- body = "; ".join(f"{name}: {value.strip()}" for name, value in parts if value and value.strip())[:1400]
+ body = "; ".join(f"{name}: {_ensure_english(value.strip())}" for name, value in parts if value and value.strip())[:1400]
if not body and not resolved_summary:
return ""
mode = "random-composed" if profile.mode == "random" else "manually selected"
@@ -4125,7 +4198,9 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq
if req.position is not None:
e.position = req.position.strip()
if req.subject_consensus_brief is not None:
- e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200]
+ e.subject_consensus_brief = _ensure_english(req.subject_consensus_brief.strip())[:2200]
+ if req.subject_consensus_brief_zh is not None:
+ e.subject_consensus_brief_zh = req.subject_consensus_brief_zh.strip()[:2200]
new_frames.append(f)
if not found:
raise HTTPException(404, "element not found")
@@ -4208,7 +4283,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if confirmed_subjects
else "Remove the main foreground subject from the frame if present. "
)
- subject_brief = req.subject_brief.strip()
+ subject_brief = _ensure_english(req.subject_brief.strip())
subject_brief_clause = (
f"Subject identity (text only, no image reference): {subject_brief[:1800]}. "
"Maintain this identity across this and other endpoint frames in the same storyboard. "
@@ -4237,7 +4312,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
"warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
"cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
}[req.scene_style]
- user_prompt = req.prompt.strip()
+ user_prompt = _ensure_english(req.prompt.strip())
user_prompt_clause = (
"User scene direction: " + user_prompt[:1200] + " "
if user_prompt
@@ -4483,6 +4558,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
if similar_mode and not brief:
brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
+ brief = _ensure_english(brief)
selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from saved database template '{template.name}': {brief}. "
@@ -4496,6 +4572,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
character_label = character.name
character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
brief = character.prompt_brief.strip() or character.description.strip()
+ brief = _ensure_english(brief)
selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from built-in creative character '{character.name}': {brief}. "
@@ -4558,7 +4635,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
if req.reconstruction_mode == "similar"
else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
)
- prompt_extra = req.prompt.strip()
+ prompt_extra = _ensure_english(req.prompt.strip())
prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
subject_profile_clause = _subject_profile_prompt_clause(req.subject_profile)
identity_lock_clause = (
@@ -4709,7 +4786,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
]
fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800]
if selected_template_brief:
- e.subject_consensus_brief = selected_template_brief[:1800]
+ e.subject_consensus_brief = _ensure_english(selected_template_brief)[:1800]
else:
asset_paths = [
job_dir(job_id) / "assets" / f"{asset.id}.jpg"
@@ -4722,9 +4799,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
asset_paths,
fallback_brief,
)
- e.subject_consensus_brief = brief or current_brief or fallback_brief or (
+ e.subject_consensus_brief = _ensure_english(brief or current_brief or fallback_brief or (
"Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area."
- )
+ ))[:1800]
+ if e.subject_consensus_brief and not e.subject_consensus_brief_zh:
+ try:
+ e.subject_consensus_brief_zh = _translate_text_sync(e.subject_consensus_brief, "zh", max_tokens=500)[:1800]
+ except Exception:
+ e.subject_consensus_brief_zh = ""
new_frames.append(f)
if generation_errors:
msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张,失败 {len(generation_errors)} 张"
@@ -5296,18 +5378,23 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
raise HTTPException(404, "subject asset files missing")
primary = next((image.id for image in images if image.view == "front"), images[0].id)
- prompt_brief = _describe_subject_template_from_images(
+ prompt_brief = _ensure_english(_describe_subject_template_from_images(
name,
req.subject_style,
saved_image_paths,
req.note.strip(),
- ) or req.note.strip()
+ ) or req.note.strip())
+ try:
+ prompt_brief_zh = _translate_text_sync(prompt_brief, "zh", max_tokens=500) if prompt_brief else ""
+ except Exception:
+ prompt_brief_zh = ""
item = SubjectTemplateItem(
id=template_id,
name=name,
description=req.note.strip(),
note=req.note.strip(),
prompt_brief=prompt_brief,
+ prompt_brief_zh=prompt_brief_zh,
source_job_id=job_id,
source_frame_idx=frame.index,
source_element_id=element.id,
diff --git a/docs/source-analysis.html b/docs/source-analysis.html
index 8fb1e54..c7456af 100644
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -663,7 +663,7 @@ api/main.py
@@ -737,17 +737,19 @@ api/main.py
cutout_id,
subject_kind: object | living,
subject_assets: SubjectAsset[],
- subject_consensus_brief
+ subject_consensus_brief,
+ subject_consensus_brief_zh
}
AudioScript
-
第一步音频解析的结构化产物。pipeline_transcribe 提取 audio.wav 后先保存原始转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。rewritten_text、voice_url 等字段仍保留给后续新配音阶段,当前第一步不默认写入。
+
第一步音频解析的结构化产物。pipeline_transcribe 提取 audio.wav 后先保存原始英文转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。rewritten_text 是英文新口播,rewritten_text_zh 只作为团队审稿镜像;voice_url 等字段仍保留给后续新配音阶段。
AudioScript {
status: idle | rewriting | completed | failed,
source_text,
source_zh,
rewritten_text,
+ rewritten_text_zh,
speaker_profile,
rhythm_profile,
background_audio_profile,
@@ -777,10 +779,11 @@ SubjectAsset {
width, height, size,
source_frame_indices[]
}
-
SubjectTemplateItem 保存用户确认过的主体视图包。prompt_brief 是后端从模板图反推的文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit。
+
SubjectTemplateItem 保存用户确认过的主体视图包。prompt_brief 是后端从模板图反推的英文文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit;prompt_brief_zh 仅用于模板库卡片和团队阅读。
SubjectTemplateItem {
id, name, description, note,
prompt_brief,
+ prompt_brief_zh,
subject_style: transparent_human | source_actor,
primary_image,
images: SubjectTemplateImage[]
@@ -907,7 +910,7 @@ ProductRefStateItem {
| 删除输入视频 | DELETE /jobs/{id} | deleteJob | 从任务队列、URL 和磁盘 jobs/<id> 目录移除整个 job,包括源视频、关键帧、元素提取图和生成视频。 |
| 解析视频 | POST /jobs/{id}/analyze?frames=&target=&mode=&quality= | analyzeJob | 抽参考帧能力。当前开始流程会在视频下载完成后自动调用一次,默认 frames=12、target=motion、quality=accurate、mode=replace,形成全局动作/节奏参考帧池;原版视频旁的“抽参考 12 帧”也会用同一参数显式重跑。target 仍支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值。 |
| 音频文案轨 | POST /jobs/{id}/transcribe | triggerTranscribe | 若尚未拆轨,先从 source.mp4 提取 audio.wav 并回填 source_audio_url;随后用 ASR 提取原始文案,翻译成中文,写入 audio_script.source_text、source_zh 和逐句 transcript。远端 ASR_MODEL 失败后先走本机 LOCAL_ASR_BIN/LOCAL_ASR_MODEL(默认 mlx_whisper),再尝试 ASR_FALLBACK_MODEL。后端会拒绝重复文本、逐秒假字幕或覆盖率过低的结果,不再把不可听的多模态输出写进时间轴。中文翻译由 TRANSLATE_MODEL 按 ASR 段落补齐,失败时保留原文时间轴且中文可为空。再用 ASR_FALLBACK_MODEL 读取 audio.wav 和已有转写时间轴,多模态音频分析讲话人、语速节奏、停顿、背景音乐/环境声/音效,写入 speaker_profile、rhythm_profile、background_audio_profile;若模型分析失败,则用转写段落、时长和语速做本地估算兜底。当前第一步不默认生成 SKG 新口播和 Azure OpenAI 配音。 |
- | 分镜脚本改写 | POST /jobs/{id}/script/rewrite | rewriteStoryboardScript | 根据原参考文案、当前新口播、分镜角色、时间段和作者想法改写中文口播。mode=segment 只改一段;mode=all 一次改完整片,要求整片前后连贯。后端按 AUDIO_REWRITE_MODEL、ASR_FALLBACK_MODEL、TRANSLATE_MODEL 依次尝试,全部失败时用本地模板保留可编辑文案。接口只返回 items[index,text],前端暂存在当前页面状态里,点击保存规划后写入 StoryboardScene.action。 |
+ | 分镜脚本改写 | POST /jobs/{id}/script/rewrite | rewriteStoryboardScript | 根据原英文参考文案、当前英文新口播、英文 role enum、时间段和作者想法改写英文口播;作者想法若含中文,后端会先经 _ensure_english 兜底翻译。mode=segment 只改一段;mode=all 一次改完整片,要求整片前后连贯。后端按 AUDIO_REWRITE_MODEL、ASR_FALLBACK_MODEL、TRANSLATE_MODEL 依次尝试,全部失败时用英文本地模板保留可编辑文案。接口返回 items[index,text,text_zh],其中 text 是写入模型链路的英文主值,text_zh 只供团队审稿镜像显示;点击保存规划后写入 StoryboardScene.action。 |
| 原始音频文件 | GET /jobs/{id}/audio.wav | sourceAudioUrl | 返回拆轨得到的 wav;当前主界面不再渲染底部吸附音频条,右侧复刻工作表会读取该文件生成参考图式横向响度波形,并和原视频、逐句时间轴联动;波形标题栏显示当前播放秒数、总时长和鼠标指针停点秒数。 |
| 改写配音文件 | GET /jobs/{id}/audio-script.mp3 | apiAssetUrl(job.audio_script.voice_url) | 后续新配音阶段保留的 TTS 产物;服务端固定走 VOICE_PROVIDER=azure_openai,通过 AZURE_OPENAI_BASE_URL 的 OpenAI 协议生成 mp3,并按 AZURE_TTS_PATHS 依次尝试 /audio/speech、/v1/audio/speech 等路径。当前第一步不默认生成该文件。 |
| 手动加帧 | POST /jobs/{id}/frames?t= | addManualFrame | 按视频时间戳抽一帧,index 递增但 frames 按 timestamp 排序。当前主界面会把原版视频播放器的播放秒数传给 AudioIntakePanel 标题栏右侧的“当前点抽帧”。 |
@@ -1035,6 +1038,19 @@ ProductRefStateItem {
变更记录
这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。
+
+
+ 2026-05-18 · 模型 prompt 语言策略切到英文主值
+ Prompt
+ UI
+ API
+
+
+
问题:最终产物是英文 TikTok 二创广告,但前端默认分镜规划、首尾帧 prompt 和用户可编辑规划字段里混入中文,导致 gpt-image-2、脚本改写和后续视频模型收到中英混杂指令。
+
改动:AudioStoryboardRow.role 改为 hook/pain/proof/solution/cta/bridge 英文枚举,UI 仍显示中文角色标签。buildAudioStoryboardRows、buildVisualPlan、buildFirstFramePlan、buildLastFramePlan、buildSubjectDescription、buildEndpointFramePrompt 和 buildStoryboardSceneFromAudioRow 的模型主字段改为英文,并新增 *Zh 镜像用于团队阅读。首尾帧提交前前端会对含中文 prompt 调 translateText 兜底,后端新增 _ensure_english 并挂到 generate_scene_asset、generate_subject_assets、脚本改写和音频分析入口。
+
影响:发给 LLM / 生图 / 视频模型的主 prompt 默认全英文;中文只作为团队审稿镜像、UI 标签和 toast。AudioScript 新增 rewritten_text_zh,KeyElement 新增 subject_consensus_brief_zh,SubjectTemplateItem 新增 prompt_brief_zh。POST /jobs/{id}/script/rewrite 返回 text 英文主值和 text_zh 中文镜像。
+
+
2026-05-18 · 首尾帧改为主体 brief + 产品少量硬参考
diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx
index ce706bb..0b000cd 100644
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -54,6 +54,7 @@ import {
updateElement,
updateStoryboard,
uploadStoryboardAsset,
+ translateText,
videoUrl,
} from "@/lib/api"
import { type NodeData } from "@/components/nodes"
@@ -83,6 +84,7 @@ const VIDEO_MODELS = [
type VideoModel = (typeof VIDEO_MODELS)[number]["value"]
type BoardThemeMode = "dark" | "light"
+type AudioStoryboardRole = "hook" | "pain" | "proof" | "solution" | "cta" | "bridge"
const BOARD_THEME_STORAGE_KEY = "skg-board-theme"
@@ -103,19 +105,28 @@ type AudioStoryboardRow = {
start: number
end: number
source: string
- role: string
+ sourceZh: string
+ role: AudioStoryboardRole
visualMode: StoryboardVisualMode
needsProduct: boolean
needsSubject: boolean
subjectDescription: string
+ subjectDescriptionZh: string
skgCopy: string
+ skgCopyZh: string
visualPlan: string
+ visualPlanZh: string
firstFramePlan: string
+ firstFramePlanZh: string
lastFramePlan: string
+ lastFramePlanZh: string
referencePlan: string
keyElements: string
+ keyElementsZh: string
productIntegration: string
+ productIntegrationZh: string
productPlacement: string
+ productPlacementZh: string
}
type ProductRefItem = ProductRefStateItem
@@ -136,7 +147,7 @@ type ResolvedSubjectProfile = {
payload: SubjectProfilePreference
}
type StoryboardVisualMode = NonNullable
-type RowPlanPatch = Partial>
+type RowPlanPatch = Partial>
type WorkflowStepId = "input" | "source" | "audio" | "visual" | "subject" | "product" | "script" | "scene" | "video"
type WorkflowStepStatus = "blocked" | "pending" | "running" | "ready" | "paused"
type WorkflowStep = {
@@ -324,6 +335,52 @@ const PRODUCT_USE_TAG_LABELS: Record = {
material_texture: "材质",
}
+const ROLE_LABELS_ZH: Record = {
+ hook: "开场钩子",
+ pain: "痛点推进",
+ proof: "利益证明",
+ solution: "方案过渡",
+ cta: "转化收口",
+ bridge: "节奏承接",
+}
+
+const ROLE_LABELS_EN: Record = {
+ hook: "hook",
+ pain: "pain build",
+ proof: "benefit proof",
+ solution: "solution transition",
+ cta: "conversion close",
+ bridge: "rhythm bridge",
+}
+
+const PRODUCT_VIEW_PROMPT_LABELS: Record = {
+ front: "front / outer shell",
+ left_45: "wearer's left 45-degree view",
+ right_45: "wearer's right 45-degree view",
+ side_thickness: "side thickness view",
+ inner_contacts: "inner neck-contact pads",
+ back_bottom: "back / bottom structure",
+}
+
+const PRODUCT_BACKGROUND_PROMPT_LABELS: Record = {
+ white: "white background",
+ black: "black background",
+ simple: "simple solid background",
+ complex: "complex background",
+ unknown: "unknown background",
+}
+
+const PRODUCT_USE_TAG_PROMPT_LABELS: Record = {
+ hero_packshot: "hero packshot",
+ wearing_scale: "wearing scale",
+ inner_contact: "inner contact pads",
+ side_thickness: "side thickness",
+ asymmetry: "left-right asymmetry",
+ button_detail: "button detail",
+ back_bottom: "back/bottom structure",
+ material_texture: "material texture",
+}
+
const controlClass =
"h-10 rounded-md border border-white/10 bg-black/55 px-3 text-[12px] text-white outline-none transition focus:border-cyan-300/60 disabled:cursor-not-allowed disabled:opacity-40"
@@ -352,6 +409,20 @@ function shortId(id?: string | null) {
return id ? id.slice(0, 8) : "-"
}
+function containsCjk(text: string) {
+ return /[\u3400-\u9fff]/.test(text)
+}
+
+async function ensureEnglishForModel(text: string) {
+ const trimmed = text.trim()
+ if (!trimmed || !containsCjk(trimmed)) return trimmed
+ try {
+ return await translateText(trimmed, "en")
+ } catch {
+ return trimmed
+ }
+}
+
function subjectProfileOption(category: SubjectProfileCategory, value: string) {
return category.options.find((option) => option.value === value) ?? category.options[0]
}
@@ -373,6 +444,16 @@ function resolveSubjectProfile(
const values = { ...DEFAULT_SUBJECT_PROFILE_DRAFT }
const labelParts: string[] = []
const promptParts: string[] = []
+ const promptLabelByKey: Record = {
+ gender: "gender presentation",
+ age: "age range",
+ wardrobe: "wardrobe style",
+ region_ethnicity: "regional or ethnic appearance cues",
+ skin_tone: "skin tone",
+ body: "body proportion",
+ hair: "hair style",
+ mood: "commercial mood",
+ }
for (const category of SUBJECT_PROFILE_CATEGORIES) {
const rawValue = draft[category.key] || "random"
let option = subjectProfileOption(category, rawValue)
@@ -382,7 +463,7 @@ function resolveSubjectProfile(
}
values[category.key] = option.value
labelParts.push(`${category.label}:${option.label}`)
- promptParts.push(`${category.label}: ${option.prompt}`)
+ promptParts.push(`${promptLabelByKey[category.key]}: ${option.prompt}`)
}
const summary = labelParts.join(" / ")
const promptSummary = promptParts.join("; ")
@@ -393,14 +474,14 @@ function resolveSubjectProfile(
promptSummary,
payload: {
mode,
- gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).label,
- age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).label,
- wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).label,
- region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).label,
- skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).label,
- body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).label,
- hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).label,
- mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).label,
+ gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).prompt,
+ age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).prompt,
+ wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).prompt,
+ region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).prompt,
+ skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).prompt,
+ body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).prompt,
+ hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).prompt,
+ mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).prompt,
resolved_summary: summary,
prompt_summary: promptSummary,
},
@@ -828,161 +909,254 @@ function buildFallbackScene(job: Job, frame: KeyFrame, order: number): Storyboar
const duration = Math.max(3.5, Math.min(7.5, Math.max(job.duration || 0, frames.length * 5) / Math.max(frames.length, 1)))
const audio = job.audio_script?.rewritten_text?.trim()
|| job.transcript?.slice(0, 4).map((item) => item.en || item.zh).filter(Boolean).join(" ")
- || "按原音频说话节奏改写为 SKG 产品介绍。"
+ || "Rewrite the original audio pacing into a new SKG product introduction."
const objects = frame.description?.objects?.slice(0, 5).map((item) => item.name).filter(Boolean).join("、")
return {
duration: Number(duration.toFixed(1)),
first_image: null,
last_image: null,
- subject: objects ? `关键元素候选:${objects}` : "保留原视频最重要的主体动作和构图关系。",
- scene: `${frame.description?.scene || `按第 ${order + 1} 段音频规划 SKG 信息流广告分镜。`}\n音频节奏依据:${audio.slice(0, 220)}`,
- product: "把原素材里的产品/痛点转成 SKG 颈部/肩颈按摩仪表达,默认使用 SKG 四张产品角度图做产品真源。",
+ subject: objects ? `Key element candidates: ${objects}` : "Keep the source video's most important subject motion and composition relationship.",
+ scene: `${frame.description?.scene || `Plan SKG information-feed ad scene ${order + 1} from the audio segment.`}\nAudio pacing reference: ${audio.slice(0, 220)}`,
+ product: "Convert the source product or pain-point context into SKG neck-and-shoulder massager expression. Use the uploaded SKG product angles as product truth.",
action: frame.description?.style
- ? `沿用原画面的讲话节奏、动作节点和 ${frame.description.style},突出使用前紧绷、使用后放松。`
- : "沿用原视频的讲话节奏和动作节点,突出使用前紧绷、使用后放松。",
+ ? `Keep the source speaking rhythm, action beats, and ${frame.description.style}; show tension before use and relaxed comfort after use.`
+ : "Keep the source speaking rhythm and action beats; show tension before use and relaxed comfort after use.",
reference_ids: [],
}
}
-function classifyAudioRole(text: string, index: number, total: number) {
+function classifyAudioRole(text: string, index: number, total: number): AudioStoryboardRole {
const lower = text.toLowerCase()
- if (index === 0) return "开场钩子"
- if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "转化收口"
- if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "痛点推进"
- if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "利益证明"
- if (/use|try|apple|product|bottle|one month/.test(lower)) return "方案过渡"
- return "节奏承接"
+ if (index === 0) return "hook"
+ if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "cta"
+ if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "pain"
+ if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "proof"
+ if (/use|try|apple|product|bottle|one month/.test(lower)) return "solution"
+ return "bridge"
}
-function buildSkgCopy(role: string, index: number) {
- const variants: Record = {
- "开场钩子": [
+function buildSkgCopy(role: AudioStoryboardRole, index: number) {
+ const variants: Record = {
+ hook: [
+ "If you spend hours looking down at your phone or working at a desk, your neck and shoulders may already be carrying that tension.",
+ "A few hours on screens can make your neck and shoulders feel tired faster than you expect.",
+ ],
+ pain: [
+ "That tight neck, heavy shoulder feeling, and uncomfortable head lift are signs you should not wait to deal with it.",
+ "Commuting, desk work, parenting, and phone scrolling can keep your neck and shoulders tense all day.",
+ ],
+ proof: [
+ "The SKG neck-and-shoulder massager sits around the back of your neck and shoulders, bringing warmth and kneading-like comfort right where you feel tight.",
+ "Wear it hands-free between work, at home, or before bed to settle into a calmer relaxation rhythm.",
+ ],
+ solution: [
+ "This beat turns the source explanation into a clear SKG routine: pick it up, wear it, adjust the fit, and relax.",
+ "Let the product enter naturally, and show the change from neck tension to a more relaxed state.",
+ ],
+ cta: [
+ "If you want neck-and-shoulder relaxation to become part of your daily routine, this SKG massager is an easy place to start.",
+ "Close with a clear product detail and a relaxed expression so viewers know exactly what to try next.",
+ ],
+ bridge: [
+ "Keep the source video's short, fast rhythm, but anchor each line in a specific neck-and-shoulder moment or product action.",
+ "Use this line as a bridge from the pain point into the SKG routine without slowing the pace.",
+ ],
+ }
+ const list = variants[role] ?? variants.bridge
+ return list[index % list.length]
+}
+
+function buildSkgCopyZh(role: AudioStoryboardRole, index: number) {
+ const variants: Record = {
+ hook: [
"如果你也经常低头刷手机、久坐办公,肩颈紧绷可能已经在悄悄影响状态。",
"每天盯屏几个小时,脖子和肩膀的疲惫会比你想得更早出现。",
],
- "痛点推进": [
+ pain: [
"脖子发紧、肩膀沉、抬头不舒服,不一定要等到很难受才处理。",
"通勤、办公、带娃、刷手机叠在一起,肩颈很容易一直处在紧绷状态。",
],
- "利益证明": [
+ proof: [
"SKG 颈部按摩仪贴合后颈和肩颈两侧,把热敷感和揉按感带到真正紧的位置。",
"戴上后不用占手,工作间隙、居家放松、睡前都能快速进入舒缓节奏。",
],
- "方案过渡": [
+ solution: [
"这一镜把原片的讲解节奏换成 SKG 使用步骤:拿起、佩戴、贴合、放松。",
"让产品自然进入画面,重点不是硬推,而是把肩颈紧绷到放松的变化拍清楚。",
],
- "转化收口": [
+ cta: [
"如果你也想把肩颈放松变成日常习惯,可以先从这台 SKG 开始。",
"最后用清晰产品特写和轻松状态收住,让用户知道现在就可以入手。",
],
- "节奏承接": [
+ bridge: [
"延续原片短句快节奏,把每一句都落到一个具体肩颈场景或产品动作。",
"这一句作为过渡,画面从痛点切到产品,让节奏继续往下走。",
],
}
- const list = variants[role] ?? variants["节奏承接"]
+ const list = variants[role] ?? variants.bridge
return list[index % list.length]
}
-function buildVisualPlan(role: string) {
- if (role === "开场钩子") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
- if (role === "痛点推进") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
- if (role === "利益证明") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
- if (role === "转化收口") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
+function buildVisualPlan(role: AudioStoryboardRole) {
+ if (role === "hook") return "Vertical close-up creator opening. The subject gently rubs the neck or rotates the shoulders to establish fatigue immediately."
+ if (role === "pain") return "Keep the source expression, gesture rhythm, and fast pacing while emphasizing phone posture, desk sitting, and neck-and-shoulder tension."
+ if (role === "proof") return "Bring the product into frame and place it around the back of the neck, then cut to fit, button, warmth, and kneading-comfort details."
+ if (role === "cta") return "End with a clean product detail plus a relaxed expression, keeping the quick action feeling of a feed ad."
+ return "Keep the source-style composition and camera movement, but replace the content with an SKG neck-and-shoulder relaxation scene."
+}
+
+function buildVisualPlanZh(role: AudioStoryboardRole) {
+ if (role === "hook") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
+ if (role === "pain") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
+ if (role === "proof") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
+ if (role === "cta") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
return "保持原片同类构图和运镜,把画面内容替换成 SKG 肩颈放松场景。"
}
-function visualModeDefaults(mode: StoryboardVisualMode) {
+function visualModeDefaults(mode: StoryboardVisualMode, language: "en" | "zh" = "en") {
if (mode === "person_only") {
return {
needsProduct: false,
needsSubject: true,
- productPlacement: "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。",
+ productPlacement: language === "zh"
+ ? "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。"
+ : "Do not show the product in this beat. Use the subject's state, pain point, or voice-over performance to carry the rhythm; do not force in the SKG product.",
}
}
if (mode === "product_only") {
return {
needsProduct: true,
needsSubject: false,
- productPlacement: "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。",
+ productPlacement: language === "zh"
+ ? "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。"
+ : "Show only the SKG neck-and-shoulder massager, wearing angle, or functional detail; do not force a main character into this beat.",
}
}
if (mode === "environment") {
return {
needsProduct: false,
needsSubject: false,
- productPlacement: "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。",
+ productPlacement: language === "zh"
+ ? "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。"
+ : "Use this beat as a scene, mood, or pacing transition. Do not show the product or main subject; keep only space, light, and motion rhythm.",
}
}
return {
needsProduct: true,
needsSubject: true,
- productPlacement: "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。",
+ productPlacement: language === "zh"
+ ? "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。"
+ : "Show the SKG neck-and-shoulder massager as an external wearable product, built around picking it up, wearing it, adjusting it, pressing controls, or relaxing with it.",
}
}
-function visualModeForRole(role: string): StoryboardVisualMode {
- if (role === "开场钩子" || role === "痛点推进") return "person_only"
- if (role === "转化收口") return "product_only"
- if (role === "节奏承接") return "environment"
+function visualModeForRole(role: AudioStoryboardRole): StoryboardVisualMode {
+ if (role === "hook" || role === "pain") return "person_only"
+ if (role === "cta") return "product_only"
+ if (role === "bridge") return "environment"
return "person_product"
}
-function buildFirstFramePlan(role: string) {
- if (role === "开场钩子") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
- if (role === "痛点推进") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
- if (role === "利益证明") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
- if (role === "方案过渡") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
- if (role === "转化收口") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
+function buildFirstFramePlan(role: AudioStoryboardRole) {
+ if (role === "hook") return "Close-up subject looking at camera or working with head down, one hand lightly touching the back of the neck, with no product visible yet."
+ if (role === "pain") return "Preserve the source action rhythm while making neck tension, looking down, neck rubbing, or desk-sitting posture clear."
+ if (role === "proof") return "The subject picks up or prepares to wear the SKG neck-and-shoulder massager; product position is clear but the action has just started."
+ if (role === "solution") return "Move from the pain state into picking up the product or bringing it toward the neck and shoulders, ready to begin use."
+ if (role === "cta") return "Clean product close-up or stable worn-product frame, leaving a strong visual focus for the conversion close."
+ return "Start from the current source sentence's composition to carry the rhythm without forcing a subject change."
+}
+
+function buildFirstFramePlanZh(role: AudioStoryboardRole) {
+ if (role === "hook") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
+ if (role === "pain") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
+ if (role === "proof") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
+ if (role === "solution") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
+ if (role === "cta") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
return "按原视频当前句的构图启动,先承接节奏,不强行改变镜头主体。"
}
-function buildLastFramePlan(role: string) {
- if (role === "开场钩子") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
- if (role === "痛点推进") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
- if (role === "利益证明") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
- if (role === "方案过渡") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
- if (role === "转化收口") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
+function buildLastFramePlan(role: AudioStoryboardRole) {
+ if (role === "hook") return "The subject lifts the head or becomes more focused, leaving room for the product or solution to enter in the next beat."
+ if (role === "pain") return "Amplify the tense state into a clear stopping point, ready to cut into the product solution."
+ if (role === "proof") return "The product is correctly worn around the back of the neck and shoulders, the subject looks more relaxed, and product scale is stable."
+ if (role === "solution") return "The product fits against the neck and shoulders, hand adjustment is complete, and the frame can move into functional detail or relaxation."
+ if (role === "cta") return "Hold a stable product or worn-product frame with clean composition, ready for purchase or action-call continuation."
+ return "Advance the action slightly and hold a stable endpoint that connects naturally to the next sentence."
+}
+
+function buildLastFramePlanZh(role: AudioStoryboardRole) {
+ if (role === "hook") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
+ if (role === "pain") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
+ if (role === "proof") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
+ if (role === "solution") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
+ if (role === "cta") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
return "动作小幅推进并稳定停住,保留与下一句衔接的方向感。"
}
-function buildSubjectDescription(role: string, visualMode: StoryboardVisualMode) {
+function buildSubjectDescription(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
+ if (visualMode === "product_only" || visualMode === "environment") return ""
+ const base = "Consistent similar subject: a friendly transparent or semi-transparent humanoid with visible clean white skeleton inside, commercial not horror, with neck, collarbone, and upper-back areas clear for wearing a neck-and-shoulder massager."
+ if (role === "hook") return `${base} Front or upper-body creator speaking state, with a pain-point or curious expression that grabs attention quickly.`
+ if (role === "pain") return `${base} Neck-and-shoulder tension, looking down, desk posture, or rubbing the neck; make the neck line, shoulders, and upper back readable.`
+ if (role === "proof") return `${base} Relaxed state while wearing or about to wear the product, prioritizing neck-and-shoulder close-up, side, and back-neck angles.`
+ if (role === "solution") return `${base} Hands adjust the product or show wearable fit naturally; product placement must not hide important anatomy or device structure.`
+ if (role === "cta") return `${base} Stable, relaxed, clean ending state using front, three-quarter, or stable worn-product framing.`
+ return `${base} Keep one consistent subject identity, material, body type, gender presentation, and commercial mood across the whole video.`
+}
+
+function buildSubjectDescriptionZh(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
if (visualMode === "product_only" || visualMode === "environment") return ""
const base = "统一相似主体:透明或半透明皮肤包裹可见白色骨架的人形,广告感、非恐怖、肩颈/锁骨/上背区域清晰,适合佩戴肩颈按摩仪。"
- if (role === "开场钩子") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
- if (role === "痛点推进") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
- if (role === "利益证明") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
- if (role === "方案过渡") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
- if (role === "转化收口") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
+ if (role === "hook") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
+ if (role === "pain") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
+ if (role === "proof") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
+ if (role === "solution") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
+ if (role === "cta") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
return `${base} 保持与整片一致的主体身份、材质、体型、性别表现和广告气质。`
}
function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
if (!job?.transcript.length) return []
return job.transcript.map((segment, index) => {
- const source = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
+ const source = segment.en?.trim() || segment.zh?.trim() || "Source audio script pending."
+ const sourceZh = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
const role = classifyAudioRole(`${segment.en} ${segment.zh}`, index, job.transcript.length)
const visualMode = visualModeForRole(role)
const defaults = visualModeDefaults(visualMode)
+ const defaultsZh = visualModeDefaults(visualMode, "zh")
+ const keyElements = role === "proof"
+ ? "wearing action, product position, hand pressing the control, relaxed expression"
+ : "creator framing, subject gesture, facial rhythm, scene lighting"
+ const keyElementsZh = role === "proof"
+ ? "佩戴动作、产品位置、手部按键、放松表情"
+ : "口播构图、人物动作、表情节奏、场景光线"
return {
index: segment.index,
start: segment.start,
end: segment.end,
source,
+ sourceZh,
role,
visualMode,
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: buildSubjectDescription(role, visualMode),
+ subjectDescriptionZh: buildSubjectDescriptionZh(role, visualMode),
skgCopy: buildSkgCopy(role, index),
+ skgCopyZh: buildSkgCopyZh(role, index),
visualPlan: buildVisualPlan(role),
+ visualPlanZh: buildVisualPlanZh(role),
firstFramePlan: buildFirstFramePlan(role),
+ firstFramePlanZh: buildFirstFramePlanZh(role),
lastFramePlan: buildLastFramePlan(role),
- referencePlan: `从原视频 ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s 定向抽 1-2 张参考帧。`,
- keyElements: role === "利益证明" ? "佩戴动作、产品位置、手部按键、放松表情" : "口播构图、人物动作、表情节奏、场景光线",
- productIntegration: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
+ lastFramePlanZh: buildLastFramePlanZh(role),
+ referencePlan: `Extract 1-2 targeted reference frames from source video ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s.`,
+ keyElements,
+ keyElementsZh,
+ productIntegration: "Replace the source product or prop context with the SKG white U-shaped neck-and-shoulder massager. The product must be worn externally around the neck and shoulders.",
+ productIntegrationZh: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
productPlacement: defaults.productPlacement,
+ productPlacementZh: defaultsZh.productPlacement,
}
})
}
@@ -1173,14 +1347,14 @@ function productReferenceNotes(items: ProductRefItem[]) {
if (!items.length) return ""
return items
.map((item, index) => {
- const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_LABELS[tag]).filter(Boolean).join("/")
+ const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_PROMPT_LABELS[tag] ?? tag).filter(Boolean).join(", ")
const orientation = formatProductOrientation(item.orientation)
- const direction = orientation ? `;方向:${orientation}` : ""
- const landmarks = item.landmarks.length ? `;结构:${item.landmarks.join("/")}` : ""
- const risk = item.risk ? `;风险:${item.risk}` : ""
- return `${index + 1}. ${productViewLabel(item.view)}|${productBackgroundLabel(item.background)}|${tags}:${item.note || "无补充备注"}${direction}${landmarks}${risk}`
+ const direction = orientation ? `; orientation: ${orientation}` : ""
+ const landmarks = item.landmarks.length ? `; structural landmarks: ${item.landmarks.join(", ")}` : ""
+ const risk = item.risk ? `; risk: ${item.risk}` : ""
+ return `${index + 1}. ${PRODUCT_VIEW_PROMPT_LABELS[item.view] ?? item.view} | ${PRODUCT_BACKGROUND_PROMPT_LABELS[item.background] ?? item.background} | ${tags || "general product reference"}: ${item.note || "no extra note"}${direction}${landmarks}${risk}`
})
- .join(";")
+ .join("; ")
}
function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
@@ -1189,11 +1363,11 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
visualMode: scene.visual_mode,
needsProduct: scene.needs_product,
needsSubject: scene.needs_subject,
- subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
- visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
+ subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("Subject source") && !line.startsWith("No main subject") && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
+ visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("Visual mode") && !line.startsWith("First-frame plan") && !line.startsWith("Last-frame plan") && !line.startsWith("Source audio reference") && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
firstFramePlan: scene.first_frame_plan,
lastFramePlan: scene.last_frame_plan,
- productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
+ productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("Product requirement") && !line.startsWith("Product placement") && !line.startsWith("Product reference pool") && !line.startsWith("No product") && !line.startsWith("This beat") && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
productPlacement: scene.product_placement,
}
}
@@ -1206,34 +1380,40 @@ function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioSto
needsProduct: patch.needsProduct ?? row.needsProduct,
needsSubject: patch.needsSubject ?? row.needsSubject,
subjectDescription: patch.subjectDescription ?? row.subjectDescription,
+ subjectDescriptionZh: patch.subjectDescriptionZh ?? row.subjectDescriptionZh,
visualPlan: patch.visualPlan ?? row.visualPlan,
+ visualPlanZh: patch.visualPlanZh ?? row.visualPlanZh,
firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan,
+ firstFramePlanZh: patch.firstFramePlanZh ?? row.firstFramePlanZh,
lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan,
+ lastFramePlanZh: patch.lastFramePlanZh ?? row.lastFramePlanZh,
productIntegration: patch.productIntegration ?? row.productIntegration,
+ productIntegrationZh: patch.productIntegrationZh ?? row.productIntegrationZh,
productPlacement: patch.productPlacement ?? row.productPlacement,
+ productPlacementZh: patch.productPlacementZh ?? row.productPlacementZh,
}
}
function productPriorityForRow(row: AudioStoryboardRow) {
- const viewPriorityByRole: Record = {
- "开场钩子": ["front", "left_45", "right_45", "side_thickness"],
- "痛点推进": ["front", "side_thickness", "left_45", "right_45"],
- "利益证明": ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
- "方案过渡": ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
- "转化收口": ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
- "节奏承接": ["front", "left_45", "right_45", "side_thickness"],
+ const viewPriorityByRole: Record = {
+ hook: ["front", "left_45", "right_45", "side_thickness"],
+ pain: ["front", "side_thickness", "left_45", "right_45"],
+ proof: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
+ solution: ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
+ cta: ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
+ bridge: ["front", "left_45", "right_45", "side_thickness"],
}
- const tagPriorityByRole: Record = {
- "开场钩子": ["hero_packshot", "asymmetry", "side_thickness"],
- "痛点推进": ["wearing_scale", "side_thickness", "hero_packshot"],
- "利益证明": ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
- "方案过渡": ["wearing_scale", "hero_packshot", "inner_contact"],
- "转化收口": ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
- "节奏承接": ["hero_packshot", "asymmetry", "side_thickness"],
+ const tagPriorityByRole: Record = {
+ hook: ["hero_packshot", "asymmetry", "side_thickness"],
+ pain: ["wearing_scale", "side_thickness", "hero_packshot"],
+ proof: ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
+ solution: ["wearing_scale", "hero_packshot", "inner_contact"],
+ cta: ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
+ bridge: ["hero_packshot", "asymmetry", "side_thickness"],
}
return {
- views: viewPriorityByRole[row.role] ?? viewPriorityByRole["节奏承接"],
- tags: tagPriorityByRole[row.role] ?? tagPriorityByRole["节奏承接"],
+ views: viewPriorityByRole[row.role] ?? viewPriorityByRole.bridge,
+ tags: tagPriorityByRole[row.role] ?? tagPriorityByRole.bridge,
}
}
@@ -1245,17 +1425,17 @@ function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" |
if (!views.includes(view)) views.push(view)
if (tag && !tags.includes(tag)) tags.push(tag)
}
- if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
- if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
- if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
- if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
- if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail")
+ if (/back neck|neck back|upper back|back view|back side|shoulder blade|last frame|worn|wearing complete|fit complete|后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
+ if (/side|profile|thickness|volume|left side|right side|45|adjust|pick up|bring.*neck|toward.*shoulder|侧面|侧身|厚度|侧厚|体积|左侧|右侧|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
+ if (/inner|contact pad|massage head|touching skin|neck contact|skin contact|内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
+ if (/wearing scale|upper body|worn on human|neck|shoulder|collarbone|佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
+ if (/button|control|switch|logo|按键|按钮|控制|开关/.test(text)) add("right_45", "button_detail")
return { views, tags }
}
function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
- return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
+ return /side|profile|thickness|back neck|upper back|back view|inner|contact pad|massage head|neck contact|close-up|closeup|button|control|worn|wearing complete|侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
? MAX_PRODUCT_REFS_PER_ENDPOINT
: 1
}
@@ -1336,26 +1516,42 @@ function subjectViewRoleHint(view: string) {
return hints[view] ?? "主体参考视角"
}
+function subjectViewPromptHint(view: string) {
+ const hints: Record = {
+ front: "front speaking shot, opening hook, expression, conversion close",
+ three_quarter_left: "left three-quarter angle, talking, pre-wear motion, natural turn",
+ three_quarter_right: "right three-quarter angle, talking, pre-wear motion, natural turn",
+ left: "left side, neck-and-shoulder side profile, wearing action, product thickness and position",
+ right: "right side, neck-and-shoulder side profile, wearing action, product thickness and position",
+ back: "back view, back neck and upper shoulders, product placement landing",
+ bust_front: "front neck-and-shoulder close-up, pain-point expression, wearing scale",
+ bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
+ bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
+ back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit",
+ }
+ return hints[view] ?? "subject reference view"
+}
+
function subjectDescriptionForRow(row: AudioStoryboardRow, subjectRefs: SubjectPlanningRef[]) {
const trimmed = row.subjectDescription.trim()
if (trimmed) return trimmed
- const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join("、")
+ const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join(", ")
return [
- "统一相似主体:使用已生成的主体视图作为人物真源,保持同一人物身份、体型、材质、年龄段、性别表现和广告气质。",
- labels ? `可用主体视角:${labels}。` : "",
- "如果本条需要人物但缺少更具体描述,默认保持透明皮肤包裹白色骨架、非恐怖、肩颈区域清晰可佩戴产品。",
+ "Consistent similar subject: use the generated subject view pack as the character truth, maintaining one identity, body proportion, material, age range, gender presentation, and commercial mood.",
+ labels ? `Available subject views: ${labels}.` : "",
+ "If this beat needs a subject but lacks a specific description, default to a friendly transparent skin shell with visible white skeleton, non-horror, with clear neck and shoulder area for wearable product placement.",
].filter(Boolean).join("")
}
function subjectPriorityForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.role} ${row.visualMode} ${row.subjectDescription} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productPlacement}`.toLowerCase()
- if (/后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
+ if (/back neck|upper back|shoulder blade|back view|fit|worn|wearing complete|correctly worn|后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "left", "right", "bust_front", "three_quarter_left", "three_quarter_right", "front"]
}
- if (/侧面|左侧|右侧|45|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
+ if (/side|left|right|45|adjust|pick up|prepare to wear|toward.*neck|hand|侧面|左侧|右侧|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
return ["bust_left_45", "bust_right_45", "left", "right", "three_quarter_left", "three_quarter_right", "bust_front", "front", "back_neck_detail", "back"]
}
- if (/近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
+ if (/close-up|closeup|upper-body|bust|neck|shoulder|collarbone|rubbing.*neck|looking down|tense|tension|近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
return ["bust_front", "bust_left_45", "bust_right_45", "front", "three_quarter_left", "three_quarter_right", "left", "right", "back_neck_detail", "back"]
}
if (role === "last_frame" && row.needsProduct) {
@@ -1371,8 +1567,8 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
.map((ref, index) => {
const rank = priority.indexOf(ref.view)
const labelText = `${ref.label || ""} ${ref.roleHint}`.toLowerCase()
- const closeupScore = /肩颈|后颈|近景|贴合|佩戴/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
- && /bust|neck|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
+ const closeupScore = /neck|shoulder|back neck|close-up|closeup|fit|wear|佩戴|肩颈|后颈|近景|贴合/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
+ && /bust|neck|close-up|closeup|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
? 12
: 0
return { ref, score: (rank >= 0 ? 100 - rank * 8 : 0) + closeupScore - index }
@@ -1383,7 +1579,7 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
}
function subjectReferenceNotes(refs: SubjectPlanningRef[]) {
- return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)}|${ref.roleHint}`).join(";")
+ return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)} | ${subjectViewPromptHint(ref.view)}`).join("; ")
}
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): SubjectPlanningRef[] {
@@ -1432,19 +1628,20 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
return [
- `分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}。`,
- `新口播文案:${row.skgCopy}`,
- `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}。`,
- `当前要生成的画面:${target}`,
- `另一端画面用于连续性参考:${opposite}`,
- `画面规划:${row.visualPlan}`,
+ `Storyboard beat ${row.index + 1}, ${role === "first_frame" ? "first frame" : "last frame"}.`,
+ `New English voice-over line: ${row.skgCopy}`,
+ `Narrative role: ${ROLE_LABELS_EN[row.role]}.`,
+ `Visual mode: ${row.visualMode}.`,
+ `Target endpoint frame to generate now: ${target}`,
+ `Opposite endpoint continuity reference: ${opposite}`,
+ `Overall visual plan: ${row.visualPlan}`,
row.needsSubject
- ? `人物主体 brief:${subjectBrief || "主体 brief 暂缺,请保持一个统一的商业广告主体,肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述,不上传主体参考图;可以根据本镜头自由改变动作、景别、表情和环境,但不能换成另一个人设。不要回到原视频关键帧复刻人物。`
- : "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
+ ? `Subject identity brief: ${subjectBrief || "Subject brief is missing. Keep one unified commercial ad subject with clear neck-and-shoulder area for product placement."}. Use only this text identity brief; no subject reference image is uploaded. The subject may freely change pose, framing, expression, gesture, and environment for this shot, but must not become a different character. Do not copy the original source-video person or keyframe.`
+ : "This beat does not need a main character. If people appear, they should only be partial hands, back-view background figures, or environmental figures; do not generate the transparent skeleton main subject.",
row.needsProduct
- ? `产品融入:${row.productPlacement}。${row.productIntegration}。本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品硬参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。`
- : "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。",
- "输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI,不要水印。画面要能作为后续视频生成的明确起止帧。",
+ ? `Product integration: ${row.productPlacement}. ${row.productIntegration}. This request provides ${selectedProductItems.length} rigid reference image(s) of the same SKG neck-and-shoulder massager: ${productNotes}. The product is a U-shaped wearable device worn around the neck and shoulders. Preserve realistic wearable scale, left-right asymmetry, button placement, contact pads, side thickness, and neck-contact position.`
+ : "Do not show the product in this beat. Do not force-generate an SKG product, package, white-background product image, or random merchandise.",
+ "Output one single 9:16 high-definition endpoint frame. No contact sheet, no multiple views, no subtitles, no platform UI, no watermark. The image must work as a clear first/last frame for downstream video generation.",
].join("\n")
}
@@ -1462,10 +1659,10 @@ function buildStoryboardSceneFromAudioRow(
const subjectNotes = subjectReferenceNotes(subjectRefs)
const subjectBrief = subjectBriefForEndpoint(row, subjectRefs)
const productGuidance = !row.needsProduct
- ? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
+ ? "This beat is planned without product visibility or without product as the visual subject. Do not force-insert an SKG product, package, white-background product render, or incorrect merchandise during video generation."
: productItems.length
- ? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。产品硬定义:这是套在脖子上的 U 形肩颈按摩仪,不是耳机、头戴设备或护颈枕。坐标系硬规则:左/右按佩戴者身体左右,不能按图片左右;上=靠近下巴/脸/颈部上沿,下=靠近锁骨/肩部下沿;内侧=贴颈皮肤/按摩触点,外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考,不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。`
- : "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。"
+ ? `The product pool has ${productItems.length} image(s); this beat selects only the ${selectedProductItems.length} most relevant reference image(s). Do not mix unselected assets into this shot. Rigid product definition: this is a U-shaped neck-and-shoulder wearable massager, not headphones, a headset, or a neck pillow. Coordinate rule: left/right refer to the wearer's body, not the image; top means closer to chin/face/upper neck, bottom means closer to collarbone/shoulders; inner means skin-contact side and massage pads, outer means shell/buttons/logo. Selected images are only product structure, angle, scale, and detail references; do not copy the white/black/studio background. View notes: ${notes}. Preserve left-right asymmetry; do not mirror the two sides. The shoulder-neck product size must match realistic wearing scale, not earphone-small and not neck-pillow-large.`
+ : "No product images are uploaded. Use the default SKG product concept only if needed, and preferably establish a same-product pool before generation to lock left-right differences, thickness, and wearing scale."
return {
duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)),
first_image: endpointRefs.firstImage ?? null,
@@ -1482,11 +1679,11 @@ function buildStoryboardSceneFromAudioRow(
subject_images: row.needsSubject ? subjectRefs : [],
subject_image: row.needsSubject ? subjectRefs[0] ?? null : null,
subject: row.needsSubject
- ? `${subjectDescription}\n主体动作/画面要素:${row.keyElements}\n主体真源:从已生成的相似主体白底视图中按本镜头需求选择 ${subjectRefs.length} 张;${subjectNotes}。关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
- : "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
- scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划:${row.firstFramePlan}\n尾帧规划:${row.lastFramePlan}\n原音频依据:${row.source}`,
- product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式:${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`,
- action: `${row.skgCopy}\n连续动作:从首帧规划自然过渡到尾帧规划,镜头类型和产品/人物需求不能中途改变。`,
+ ? `${subjectDescription}\nSubject action and visual elements: ${row.keyElements}\nSubject source: select ${subjectRefs.length} generated similar-subject view(s) according to this shot's need; ${subjectNotes}. Source keyframes are only used for upstream subject extraction and must not be used as direct endpoint-frame references.`
+ : "No main character or similar-subject reference is needed for this beat. If people appear, they should be background or partial-body context, not the main subject.",
+ scene: `Visual mode: ${row.visualMode}\n${row.visualPlan}\nFirst-frame plan: ${row.firstFramePlan}\nLast-frame plan: ${row.lastFramePlan}\nSource audio reference: ${row.source}`,
+ product: `Product requirement: ${row.needsProduct ? "product reference required" : "no product required for this beat"}\nProduct placement: ${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "This beat focuses on emotion, subject state, space, or pacing transition and should not show the product."}\n${productGuidance}`,
+ action: `${row.skgCopy}\nContinuity action: transition naturally from the first-frame plan to the last-frame plan. The visual mode and product/subject requirements must not change mid-clip.`,
reference_ids: [],
}
}
@@ -3103,8 +3300,10 @@ function AudioStoryboardPlanPanel({
const [productAnalyzing, setProductAnalyzing] = useState(false)
const [productAngleBusy, setProductAngleBusy] = useState(null)
const [copyOverrides, setCopyOverrides] = useState>({})
+ const [copyZhOverrides, setCopyZhOverrides] = useState>({})
const [planOverrides, setPlanOverrides] = useState>({})
const [authorIntent, setAuthorIntent] = useState("")
+ const [showChineseMirror, setShowChineseMirror] = useState(true)
const [scriptRewriteBusy, setScriptRewriteBusy] = useState<"all" | number | null>(null)
const productFileRef = useRef(null)
const productPersistSeq = useRef(0)
@@ -3146,6 +3345,7 @@ function AudioStoryboardPlanPanel({
}
const copyForRow = (row: AudioStoryboardRow) => copyOverrides[row.index] ?? row.skgCopy
+ const copyZhForRow = (row: AudioStoryboardRow) => copyZhOverrides[row.index] ?? row.skgCopyZh
const patchRowCopy = (rowIndex: number, value: string) => {
setCopyOverrides((prev) => ({ ...prev, [rowIndex]: value }))
@@ -3163,7 +3363,9 @@ function AudioStoryboardPlanPanel({
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: row ? buildSubjectDescription(row.role, mode) : "",
+ subjectDescriptionZh: row ? buildSubjectDescriptionZh(row.role, mode) : "",
productPlacement: defaults.productPlacement,
+ productPlacementZh: visualModeDefaults(mode, "zh").productPlacement,
})
}
@@ -3328,7 +3530,7 @@ function AudioStoryboardPlanPanel({
await analyzeAndCompleteProductViews(productItems.map((item) => item.ref))
}
- const applyScriptRewriteItems = (items: Array<{ index: number; text: string }>) => {
+ const applyScriptRewriteItems = (items: Array<{ index: number; text: string; text_zh?: string }>) => {
if (!items.length) return
setCopyOverrides((prev) => {
const next = { ...prev }
@@ -3337,6 +3539,13 @@ function AudioStoryboardPlanPanel({
}
return next
})
+ setCopyZhOverrides((prev) => {
+ const next = { ...prev }
+ for (const item of items) {
+ if (item.text_zh?.trim()) next[item.index] = item.text_zh.trim()
+ }
+ return next
+ })
}
const rewriteSingleRow = async (row: AudioStoryboardRow) => {
@@ -3426,13 +3635,16 @@ function AudioStoryboardPlanPanel({
setEndpointFrameBusy(busyKey)
try {
await saveRowStoryboardDraft(plannedRow, frame)
+ const rawPrompt = buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief)
+ const prompt = await ensureEnglishForModel(rawPrompt)
+ const englishSubjectBrief = await ensureEnglishForModel(subjectBrief)
const updated = await generateSceneAsset(job.id, frame.index, {
size: SUBJECT_ASSET_SIZE,
scene_mode: "similar",
scene_style: "premium_product",
asset_role: role,
- prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief),
- subject_brief: subjectBrief,
+ prompt,
+ subject_brief: englishSubjectBrief,
product_images: selectedProductItems.map((item) => item.ref),
source_frame_indices: [],
})
@@ -3622,6 +3834,13 @@ function AudioStoryboardPlanPanel({
/>
+