feat: enforce english prompt language strategy

This commit is contained in:
2026-05-18 20:07:11 +08:00
parent adf8b2ba0a
commit bc0b010def
4 changed files with 560 additions and 205 deletions

View File

@@ -91,7 +91,7 @@ YTDLP_COOKIES_FILE = os.getenv("YTDLP_COOKIES_FILE", "").strip()
YTDLP_COOKIES_FROM_BROWSER = os.getenv("YTDLP_COOKIES_FROM_BROWSER", "").strip()
AUDIO_PRODUCT_BRIEF = os.getenv(
"AUDIO_PRODUCT_BRIEF",
"SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。",
"SKG smart massage products for everyday neck-and-shoulder, back, eye, knee, or foot relaxation. Ads should feel premium, clean, trustworthy, and must not make medical efficacy claims.",
).strip()
AUDIO_REWRITE_MODEL = gpt_model_env("AUDIO_REWRITE_MODEL", REWRITE_MODEL)
VOICE_PROVIDER = "azure_openai"
@@ -454,6 +454,7 @@ class CharacterLibraryItem(BaseModel):
folder: str = ""
description: str = ""
prompt_brief: str = ""
prompt_brief_zh: str = ""
primary_image: str = ""
images: list[CharacterLibraryImage] = Field(default_factory=list)
@@ -480,6 +481,7 @@ class SubjectTemplateItem(BaseModel):
description: str = ""
note: str = ""
prompt_brief: str = ""
prompt_brief_zh: str = ""
source: Literal["database"] = "database"
source_job_id: str = ""
source_frame_idx: int = -1
@@ -534,6 +536,7 @@ class KeyElement(BaseModel):
subject_kind: SubjectKind = "object"
subject_assets: list[SubjectAsset] = Field(default_factory=list)
subject_consensus_brief: str = ""
subject_consensus_brief_zh: str = ""
created_at: float = 0.0
@@ -565,6 +568,7 @@ class AudioScript(BaseModel):
source_text: str = ""
source_zh: str = ""
rewritten_text: str = ""
rewritten_text_zh: str = ""
speaker_profile: str = ""
rhythm_profile: str = ""
background_audio_profile: str = ""
@@ -2307,7 +2311,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
fallback = _fallback_audio_profile(segments, target_seconds)
if not LLM_API_KEY or not wav.exists():
return fallback
transcript = _transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript."
transcript = _ensure_english(_transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript.")
try:
audio_b64 = base64.b64encode(wav.read_bytes()).decode("ascii")
except Exception:
@@ -2373,12 +2377,15 @@ def _build_audio_intake_sync(job_id: str, wav: Path, segments: list[TranscriptSe
)
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]:
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str, str]:
fallback = _fallback_audio_script(segments, target_seconds)
try:
fallback_zh = _translate_text_sync(fallback, "zh", max_tokens=300) if LLM_API_KEY else ""
except Exception:
fallback_zh = ""
if not LLM_API_KEY:
return fallback, "LLM_API_KEY 未配置,使用本地 SKG 模板"
return fallback, fallback_zh, "LLM_API_KEY 未配置,使用本地 SKG 模板"
source_text = _transcript_join(segments, "en")
source_zh = _transcript_join(segments, "zh")
min_words, max_words = _voiceover_target_words(target_seconds)
prompt = (
"You are an English short-video voice-over writer for SKG wellness massagers. "
@@ -2392,10 +2399,9 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
"5. Introduce SKG products directly: smart massage, warmth, rhythm, daily neck/back/eye/knee/foot relaxation.\n"
"6. Keep it easy for TTS: short sentences, spoken phrasing, no hashtags, no stage directions, no quotation marks.\n"
"7. If the source transcript is thin, ignore it and write a general SKG product intro.\n"
'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
f"English transcript:\n{source_text or 'None'}\n\n"
f"Chinese translation for reference:\n{source_zh or 'None'}"
'Return strict JSON only: {"rewritten_text":"English VO","rewritten_text_zh":"Simplified Chinese mirror for team review"}.\n\n'
f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
f"English transcript:\n{source_text or 'None'}"
)
try:
resp = llm().chat.completions.create(
@@ -2415,9 +2421,12 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
raw = match.group(0) if match else raw
data = json.loads(raw)
text = str(data.get("rewritten_text", "")).strip()
return (text or fallback), ""
text_zh = str(data.get("rewritten_text_zh", "")).strip()
if text and not text_zh:
text_zh = _translate_text_sync(text, "zh", max_tokens=300)
return (text or fallback), (text_zh or fallback_zh), ""
except Exception as e:
return fallback, f"改写失败,使用本地模板:{e}"
return fallback, fallback_zh, f"改写失败,使用本地模板:{e}"
def _choose_azure_voice_id() -> str:
@@ -2521,7 +2530,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
source_text = _transcript_join(segments, "en")
source_zh = _transcript_join(segments, "zh")
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
rewritten, rewritten_zh, rewrite_error = _rewrite_audio_script_sync(segments, duration)
selected_voice_id = _choose_tts_voice_id()
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
voice_url = ""
@@ -2539,6 +2548,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
source_text=source_text,
source_zh=source_zh,
rewritten_text=rewritten,
rewritten_text_zh=rewritten_zh,
speaker_profile=speaker_profile,
rhythm_profile=rhythm_profile,
product_brief=AUDIO_PRODUCT_BRIEF,
@@ -3055,6 +3065,55 @@ class RewriteStoryboardScriptReq(BaseModel):
segments: list[ScriptRewriteSegmentReq] = Field(default_factory=list)
_TRANSLATION_CACHE: dict[str, str] = {}
def _contains_cjk(text: str) -> bool:
return bool(re.search(r"[\u3400-\u9fff]", text or ""))
def _translate_text_sync(text: str, target: Literal["en", "zh"] = "en", *, max_tokens: int = 700) -> str:
text = (text or "").strip()
if not text or not LLM_API_KEY:
return text
target_label = "English" if target == "en" else "Simplified Chinese"
prompt = (
f"Translate the following TikTok ad planning text into concise natural {target_label}. "
"Preserve concrete product, camera, subject, timing, and structure details. "
"Do not add commentary, markdown, quotes, or explanations.\n\n"
f"Input:\n{text}"
)
resp = llm().chat.completions.create(
model=TRANSLATE_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.15,
max_tokens=max_tokens,
)
out = (resp.choices[0].message.content or "").strip()
if not out:
rc = getattr(resp.choices[0].message, "reasoning_content", "") or ""
if rc:
out = rc.strip().splitlines()[-1].strip()
return re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip() or text
def _ensure_english(text: str) -> str:
text = (text or "").strip()
if not text or not _contains_cjk(text):
return text
key = hashlib.sha256(("en\0" + text).encode("utf-8")).hexdigest()
cached = _TRANSLATION_CACHE.get(key)
if cached:
return cached
try:
translated = _translate_text_sync(text, "en", max_tokens=max(700, min(3500, len(text) // 2 + 900)))
_TRANSLATION_CACHE[key] = translated
return translated
except Exception as e:
print(f"[ensure english fallback] {e}", flush=True)
return text
@app.post("/translate")
def translate_text(req: TranslateReq) -> dict:
"""单条文本翻译(给生图自定义提取元素 zh→en 用)"""
@@ -3092,22 +3151,26 @@ def translate_text(req: TranslateReq) -> dict:
def _fallback_script_rewrite_item(segment: ScriptRewriteSegmentReq, author_intent: str = "") -> dict:
source = (segment.source or "").strip()
intent = (author_intent or "").strip()
intent = _ensure_english(author_intent or "")
role = segment.role or ""
templates = {
"开场钩子": "你有没有发现,低头久了以后,脖子和肩膀会先替你喊累。",
"痛点推进": "刷手机、坐电脑、赶通勤叠在一起,肩颈很容易一直绷着放不下来。",
"利益证明": "SKG 这种挂脖按摩仪,重点就是贴住肩颈位置,把热敷感和揉按感带到真正紧的地方。",
"方案过渡": "这一段可以直接拍拿起、戴上、贴合,让产品自然进入日常放松场景。",
"转化收口": "如果你也想把肩颈放松变成每天的小习惯,可以从这台 SKG 开始。",
"节奏承接": "顺着原片节奏,把这一句落到一个具体的肩颈使用场景里。",
"hook": "Have you noticed that after hours of looking down, your neck and shoulders complain before you do?",
"pain": "Phone scrolling, desk work, and commuting can keep your neck and shoulders tight all day.",
"proof": "An SKG wearable massager sits around the neck and shoulders, bringing warm, rhythmic comfort to the spots that feel tense.",
"solution": "This beat can simply show pick up, wear, fit, and relax, so the product enters a normal daily routine.",
"cta": "If you want neck-and-shoulder relaxation to become a daily habit, start with this SKG massager.",
"bridge": "Follow the source rhythm, but land this line in one specific neck-and-shoulder use moment.",
}
rewritten = templates.get(role, templates["节奏承接"])
if source and role not in {"开场钩子", "转化收口"}:
rewritten = f"{rewritten} 原片这一句的节奏可以保留,但内容换成 SKG 的佩戴和放松体验。"
rewritten = templates.get(role, templates["bridge"])
if source and role not in {"hook", "cta"}:
rewritten = f"{rewritten} Keep the source sentence rhythm, but replace the content with SKG wearing and relaxation experience."
if intent:
rewritten = f"{rewritten} 语气按作者想法处理:{intent[:44]}"
return {"index": segment.index, "text": rewritten[:220]}
rewritten = f"{rewritten} Adjust the tone based on the creator note: {intent[:90]}."
try:
zh = _translate_text_sync(rewritten, "zh", max_tokens=260) if LLM_API_KEY else ""
except Exception:
zh = ""
return {"index": segment.index, "text": rewritten[:260], "text_zh": zh}
def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentReq], author_intent: str = "") -> list[dict]:
@@ -3123,7 +3186,7 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
raw_items = data.get("items") if isinstance(data, dict) else data
if not isinstance(raw_items, list):
raw_items = []
by_index: dict[int, str] = {}
by_index: dict[int, tuple[str, str]] = {}
for item in raw_items:
if not isinstance(item, dict):
continue
@@ -3132,19 +3195,27 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
except Exception:
continue
value = str(item.get("text") or item.get("rewritten_text") or "").strip()
value_zh = str(item.get("text_zh") or item.get("rewritten_text_zh") or "").strip()
if value:
by_index[idx] = re.sub(r"\s+", " ", value).strip()[:260]
return [
{"index": segment.index, "text": by_index.get(segment.index) or _fallback_script_rewrite_item(segment, author_intent)["text"]}
for segment in requested
]
by_index[idx] = (re.sub(r"\s+", " ", value).strip()[:260], re.sub(r"\s+", " ", value_zh).strip()[:260])
items = []
for segment in requested:
fallback = _fallback_script_rewrite_item(segment, author_intent)
text, text_zh = by_index.get(segment.index, ("", ""))
if text and not text_zh:
try:
text_zh = _translate_text_sync(text, "zh", max_tokens=260) if LLM_API_KEY else ""
except Exception:
text_zh = ""
items.append({"index": segment.index, "text": text or fallback["text"], "text_zh": text_zh or fallback.get("text_zh", "")})
return items
def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dict]:
segments = [segment for segment in req.segments if (segment.source or segment.current_text).strip()]
if not segments:
return []
author_intent = (req.author_intent or "").strip()
author_intent = _ensure_english(req.author_intent or "")
if not LLM_API_KEY:
return [_fallback_script_rewrite_item(segment, author_intent) for segment in segments]
payload = [
@@ -3152,26 +3223,27 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
"index": segment.index,
"time": f"{segment.start:.1f}-{segment.end:.1f}s",
"role": segment.role,
"source_reference": segment.source,
"current_voiceover": segment.current_text,
"source_reference": _ensure_english(segment.source),
"current_voiceover": _ensure_english(segment.current_text),
}
for segment in segments
]
prompt = (
"你是信息流广告脚本文案改写师。任务:基于原参考文案的节奏和信息结构,把每段改写成 SKG 挂脖肩颈按摩仪的新口播文案。\n"
"硬规则:\n"
"1. 输出中文短视频口播,不要英文,不要舞台说明,不要引号。\n"
"2. 不逐字翻译原文,不保留原品牌、价格、优惠码、平台话术;只参考节奏、钩子、痛点、转化结构。\n"
"3. 产品固定为套在脖子上的 U 形肩颈按摩仪,表达肩颈紧绷、久坐低头、热敷感、揉按感、佩戴放松和日常使用场景。\n"
"4. 避免医疗疗效、治疗、治愈、止痛等强功效承诺。\n"
"5. 每段尽量短,适配该段时间;保持自然创作者口吻。\n"
"6. mode=all整片要前后连贯mode=segment 时,只改给定段落但仍要贴合上下文风格。\n"
f"作者想法:{author_intent or '没有额外想法,按原片节奏改成自然卖点口播。'}\n"
f"改写模式:{req.mode}\n"
f"SKG 产品背景:{AUDIO_PRODUCT_BRIEF}\n\n"
"输入段落 JSON\n"
"You are an information-feed ad voice-over rewrite specialist. Rewrite each segment into a new ENGLISH SKG neck-and-shoulder massager voice-over line while preserving the source rhythm and information structure.\n"
"Hard rules:\n"
"1. The main text field must be English short-video VO. No stage directions, no quotes.\n"
"2. Do not translate word-for-word. Do not keep the original brand, price, discount code, platform CTA, or exact claims; only reuse rhythm, hook, pain-point, proof, and conversion structure.\n"
"3. The product is a U-shaped neck-and-shoulder wearable massager worn around the neck. Express neck/shoulder tension, desk posture, looking down, warmth, kneading-like comfort, wearing, relaxation, and daily use.\n"
"4. Avoid medical treatment, cure, pain elimination, clinical, or disease claims.\n"
"5. Keep each segment short enough for its time range and natural for a creator voice.\n"
"6. If mode=all, make the whole piece coherent; if mode=segment, rewrite only the given segment while matching the broader style.\n"
"7. Also return a Simplified Chinese mirror for team review in text_zh; it is not for model prompts.\n"
f"Creator note: {author_intent or 'No extra note; follow the source pacing and turn it into natural SKG product VO.'}\n"
f"Rewrite mode: {req.mode}\n"
f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
"Input segments JSON:\n"
+ json.dumps(payload, ensure_ascii=False)
+ '\n\n只输出严格 JSON{"items":[{"index":0,"text":"改写后的中文口播"}]}'
+ '\n\nReturn strict JSON only: {"items":[{"index":0,"text":"rewritten English VO","text_zh":"中文镜像"}]}'
)
models = []
for model in [AUDIO_REWRITE_MODEL, ASR_FALLBACK_MODEL, TRANSLATE_MODEL]:
@@ -3182,7 +3254,7 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
resp = llm().chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "只返回合法 JSON不要 markdown不要解释。"},
{"role": "system", "content": "Return valid JSON only. No markdown. No explanation."},
{"role": "user", "content": prompt},
],
response_format={"type": "json_object"},
@@ -3950,6 +4022,7 @@ class UpdateElementReq(BaseModel):
name_en: str | None = None
position: str | None = None
subject_consensus_brief: str | None = None
subject_consensus_brief_zh: str | None = None
class GenerateSceneAssetReq(BaseModel):
@@ -3998,8 +4071,8 @@ class GenerateSubjectAssetsReq(BaseModel):
def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) -> str:
if not profile:
return ""
prompt_summary = (profile.prompt_summary or "").strip()
resolved_summary = (profile.resolved_summary or "").strip()
prompt_summary = _ensure_english(profile.prompt_summary or "")
resolved_summary = _ensure_english(profile.resolved_summary or "")
if prompt_summary:
body = prompt_summary[:1400]
else:
@@ -4013,7 +4086,7 @@ def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) ->
("hair style", profile.hair),
("commercial mood", profile.mood),
]
body = "; ".join(f"{name}: {value.strip()}" for name, value in parts if value and value.strip())[:1400]
body = "; ".join(f"{name}: {_ensure_english(value.strip())}" for name, value in parts if value and value.strip())[:1400]
if not body and not resolved_summary:
return ""
mode = "random-composed" if profile.mode == "random" else "manually selected"
@@ -4125,7 +4198,9 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq
if req.position is not None:
e.position = req.position.strip()
if req.subject_consensus_brief is not None:
e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200]
e.subject_consensus_brief = _ensure_english(req.subject_consensus_brief.strip())[:2200]
if req.subject_consensus_brief_zh is not None:
e.subject_consensus_brief_zh = req.subject_consensus_brief_zh.strip()[:2200]
new_frames.append(f)
if not found:
raise HTTPException(404, "element not found")
@@ -4208,7 +4283,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
if confirmed_subjects
else "Remove the main foreground subject from the frame if present. "
)
subject_brief = req.subject_brief.strip()
subject_brief = _ensure_english(req.subject_brief.strip())
subject_brief_clause = (
f"Subject identity (text only, no image reference): {subject_brief[:1800]}. "
"Maintain this identity across this and other endpoint frames in the same storyboard. "
@@ -4237,7 +4312,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
"warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
"cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
}[req.scene_style]
user_prompt = req.prompt.strip()
user_prompt = _ensure_english(req.prompt.strip())
user_prompt_clause = (
"User scene direction: " + user_prompt[:1200] + " "
if user_prompt
@@ -4483,6 +4558,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
if similar_mode and not brief:
brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
brief = _ensure_english(brief)
selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from saved database template '{template.name}': {brief}. "
@@ -4496,6 +4572,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
character_label = character.name
character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
brief = character.prompt_brief.strip() or character.description.strip()
brief = _ensure_english(brief)
selected_template_brief = brief.strip()
template_brief_clause = (
f"Reference character brief from built-in creative character '{character.name}': {brief}. "
@@ -4558,7 +4635,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
if req.reconstruction_mode == "similar"
else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
)
prompt_extra = req.prompt.strip()
prompt_extra = _ensure_english(req.prompt.strip())
prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
subject_profile_clause = _subject_profile_prompt_clause(req.subject_profile)
identity_lock_clause = (
@@ -4709,7 +4786,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
]
fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800]
if selected_template_brief:
e.subject_consensus_brief = selected_template_brief[:1800]
e.subject_consensus_brief = _ensure_english(selected_template_brief)[:1800]
else:
asset_paths = [
job_dir(job_id) / "assets" / f"{asset.id}.jpg"
@@ -4722,9 +4799,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
asset_paths,
fallback_brief,
)
e.subject_consensus_brief = brief or current_brief or fallback_brief or (
e.subject_consensus_brief = _ensure_english(brief or current_brief or fallback_brief or (
"Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area."
)
))[:1800]
if e.subject_consensus_brief and not e.subject_consensus_brief_zh:
try:
e.subject_consensus_brief_zh = _translate_text_sync(e.subject_consensus_brief, "zh", max_tokens=500)[:1800]
except Exception:
e.subject_consensus_brief_zh = ""
new_frames.append(f)
if generation_errors:
msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张,失败 {len(generation_errors)}"
@@ -5296,18 +5378,23 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
raise HTTPException(404, "subject asset files missing")
primary = next((image.id for image in images if image.view == "front"), images[0].id)
prompt_brief = _describe_subject_template_from_images(
prompt_brief = _ensure_english(_describe_subject_template_from_images(
name,
req.subject_style,
saved_image_paths,
req.note.strip(),
) or req.note.strip()
) or req.note.strip())
try:
prompt_brief_zh = _translate_text_sync(prompt_brief, "zh", max_tokens=500) if prompt_brief else ""
except Exception:
prompt_brief_zh = ""
item = SubjectTemplateItem(
id=template_id,
name=name,
description=req.note.strip(),
note=req.note.strip(),
prompt_brief=prompt_brief,
prompt_brief_zh=prompt_brief_zh,
source_job_id=job_id,
source_frame_idx=frame.index,
source_element_id=element.id,

View File

@@ -663,7 +663,7 @@ api/main.py
</div>
<div class="flow-row">
<div><strong>你看到的区域</strong><span>信息流复刻分镜工作台</span></div>
<div><strong>主要源码</strong><span><code>AudioStoryboardPlanPanel</code><code>ProductReferenceCard</code><code>MissingProductViewSlot</code><code>buildAudioStoryboardRows</code><code>selectProductItemsForRow</code><code>subjectAssetRefsForPlanning</code><code>subjectBriefForEndpoint</code><code>endpointAssetRef</code><code>buildEndpointFramePrompt</code><code>buildStoryboardSceneFromAudioRow</code><code>generateEndpointFrameForRow</code><code>saveRowStoryboardDraft</code><code>saveAllStoryboardDrafts</code><code>EndpointFrameSlot</code><code>StoryboardVideoSlots</code> in <code>web/components/ad-recreation-board.tsx</code>;产品图、首尾帧和视频候选缩略图统一复用 <code>MediaAssetTile</code>,包括顶层 hover 放大和删除入口。产品白底图上传复用 <code>uploadStoryboardAsset</code>,视角自动识别调用 <code>analyzeProductViews</code>,缺角度自动补图调用 <code>generateProductAngleAsset</code>。当前单条/批量按钮只保存规划;首尾帧按钮调用 <code>generateSceneAsset</code>,传 <code>subject_brief</code> 和端点选择后的 1-2 张 <code>product_images</code>,不再传主体图或 contact sheet再用 <code>PUT /frames/{idx}/storyboard</code> 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。<code>web/app/page.tsx</code> 的视频提交回调有暂停保护,旧入口误触也不会请求 <code>/storyboard/video</code></span></div>
<div><strong>主要源码</strong><span><code>AudioStoryboardPlanPanel</code><code>ProductReferenceCard</code><code>MissingProductViewSlot</code><code>buildAudioStoryboardRows</code><code>selectProductItemsForRow</code><code>subjectAssetRefsForPlanning</code><code>subjectBriefForEndpoint</code><code>endpointAssetRef</code><code>buildEndpointFramePrompt</code><code>buildStoryboardSceneFromAudioRow</code><code>generateEndpointFrameForRow</code><code>saveRowStoryboardDraft</code><code>saveAllStoryboardDrafts</code><code>EndpointFrameSlot</code><code>StoryboardVideoSlots</code> in <code>web/components/ad-recreation-board.tsx</code>;产品图、首尾帧和视频候选缩略图统一复用 <code>MediaAssetTile</code>,包括顶层 hover 放大和删除入口。产品白底图上传复用 <code>uploadStoryboardAsset</code>,视角自动识别调用 <code>analyzeProductViews</code>,缺角度自动补图调用 <code>generateProductAngleAsset</code>。当前单条/批量按钮只保存规划;首尾帧按钮调用 <code>generateSceneAsset</code>,传 <code>subject_brief</code> 和端点选择后的 1-2 张 <code>product_images</code>,不再传主体图或 contact sheet再用 <code>PUT /frames/{idx}/storyboard</code> 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。语言策略由 <code>AudioStoryboardRow</code> 的英文主字段 + <code>*Zh</code> 镜像字段承载:<code>role</code> 内部是 <code>hook/pain/proof/solution/cta/bridge</code><code>buildEndpointFramePrompt</code><code>StoryboardScene</code> 主值默认英文,中文只用于团队阅读;首尾帧提交前前端 <code>translateText</code> 兜底,后端 <code>_ensure_english</code> 再兜底。<code>web/app/page.tsx</code> 的视频提交回调有暂停保护,旧入口误触也不会请求 <code>/storyboard/video</code></span></div>
<div><strong>适合怎么描述</strong><span>“按音频逐句生成产品分镜、每行怎样改写口播、哪几句不需要产品或人物、首帧/尾帧该怎么停、首尾帧是否已经生成并准确、产品素材池识别/补图后的备注是否准确、哪些分镜后续才值得进入单条视频候选”。</span></div>
</div>
<div class="flow-row">
@@ -737,17 +737,19 @@ api/main.py
cutout_id,
subject_kind: object | living,
subject_assets: SubjectAsset[],
subject_consensus_brief
subject_consensus_brief,
subject_consensus_brief_zh
}</pre>
</div>
<div class="card">
<h3>AudioScript</h3>
<p>第一步音频解析的结构化产物。<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后先保存原始转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。<code>rewritten_text</code><code>voice_url</code> 等字段仍保留给后续新配音阶段,当前第一步不默认写入</p>
<p>第一步音频解析的结构化产物。<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后先保存原始英文转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。<code>rewritten_text</code> 是英文新口播,<code>rewritten_text_zh</code> 只作为团队审稿镜像;<code>voice_url</code> 等字段仍保留给后续新配音阶段。</p>
<pre>AudioScript {
status: idle | rewriting | completed | failed,
source_text,
source_zh,
rewritten_text,
rewritten_text_zh,
speaker_profile,
rhythm_profile,
background_audio_profile,
@@ -777,10 +779,11 @@ SubjectAsset {
width, height, size,
source_frame_indices[]
}</pre>
<p><code>SubjectTemplateItem</code> 保存用户确认过的主体视图包。<code>prompt_brief</code> 是后端从模板图反推的文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit。</p>
<p><code>SubjectTemplateItem</code> 保存用户确认过的主体视图包。<code>prompt_brief</code> 是后端从模板图反推的英文文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit<code>prompt_brief_zh</code> 仅用于模板库卡片和团队阅读</p>
<pre>SubjectTemplateItem {
id, name, description, note,
prompt_brief,
prompt_brief_zh,
subject_style: transparent_human | source_actor,
primary_image,
images: SubjectTemplateImage[]
@@ -907,7 +910,7 @@ ProductRefStateItem {
<tr><td>删除输入视频</td><td><code>DELETE /jobs/{id}</code></td><td><code>deleteJob</code></td><td>从任务队列、URL 和磁盘 <code>jobs/&lt;id&gt;</code> 目录移除整个 job包括源视频、关键帧、元素提取图和生成视频。</td></tr>
<tr><td>解析视频</td><td><code>POST /jobs/{id}/analyze?frames=&amp;target=&amp;mode=&amp;quality=</code></td><td><code>analyzeJob</code></td><td>抽参考帧能力。当前开始流程会在视频下载完成后自动调用一次,默认 <code>frames=12</code><code>target=motion</code><code>quality=accurate</code><code>mode=replace</code>,形成全局动作/节奏参考帧池;原版视频旁的“抽参考 12 帧”也会用同一参数显式重跑。<code>target</code> 仍支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值。</td></tr>
<tr><td>音频文案轨</td><td><code>POST /jobs/{id}/transcribe</code></td><td><code>triggerTranscribe</code></td><td>若尚未拆轨,先从 <code>source.mp4</code> 提取 <code>audio.wav</code> 并回填 <code>source_audio_url</code>;随后用 ASR 提取原始文案,翻译成中文,写入 <code>audio_script.source_text</code><code>source_zh</code> 和逐句 <code>transcript</code>。远端 <code>ASR_MODEL</code> 失败后先走本机 <code>LOCAL_ASR_BIN</code>/<code>LOCAL_ASR_MODEL</code>(默认 <code>mlx_whisper</code>),再尝试 <code>ASR_FALLBACK_MODEL</code>。后端会拒绝重复文本、逐秒假字幕或覆盖率过低的结果,不再把不可听的多模态输出写进时间轴。中文翻译由 <code>TRANSLATE_MODEL</code> 按 ASR 段落补齐,失败时保留原文时间轴且中文可为空。再用 <code>ASR_FALLBACK_MODEL</code> 读取 <code>audio.wav</code> 和已有转写时间轴,多模态音频分析讲话人、语速节奏、停顿、背景音乐/环境声/音效,写入 <code>speaker_profile</code><code>rhythm_profile</code><code>background_audio_profile</code>;若模型分析失败,则用转写段落、时长和语速做本地估算兜底。当前第一步不默认生成 SKG 新口播和 Azure OpenAI 配音。</td></tr>
<tr><td>分镜脚本改写</td><td><code>POST /jobs/{id}/script/rewrite</code></td><td><code>rewriteStoryboardScript</code></td><td>根据原参考文案、当前新口播、分镜角色、时间段和作者想法改写文口播。<code>mode=segment</code> 只改一段;<code>mode=all</code> 一次改完整片,要求整片前后连贯。后端按 <code>AUDIO_REWRITE_MODEL</code><code>ASR_FALLBACK_MODEL</code><code>TRANSLATE_MODEL</code> 依次尝试,全部失败时用本地模板保留可编辑文案。接口返回 <code>items[index,text]</code>前端暂存在当前页面状态里,点击保存规划后写入 <code>StoryboardScene.action</code></td></tr>
<tr><td>分镜脚本改写</td><td><code>POST /jobs/{id}/script/rewrite</code></td><td><code>rewriteStoryboardScript</code></td><td>根据原英文参考文案、当前英文新口播、英文 role enum、时间段和作者想法改写文口播;作者想法若含中文,后端会先经 <code>_ensure_english</code> 兜底翻译<code>mode=segment</code> 只改一段;<code>mode=all</code> 一次改完整片,要求整片前后连贯。后端按 <code>AUDIO_REWRITE_MODEL</code><code>ASR_FALLBACK_MODEL</code><code>TRANSLATE_MODEL</code> 依次尝试,全部失败时用英文本地模板保留可编辑文案。接口返回 <code>items[index,text,text_zh]</code>其中 <code>text</code> 是写入模型链路的英文主值,<code>text_zh</code> 只供团队审稿镜像显示;点击保存规划后写入 <code>StoryboardScene.action</code></td></tr>
<tr><td>原始音频文件</td><td><code>GET /jobs/{id}/audio.wav</code></td><td><code>sourceAudioUrl</code></td><td>返回拆轨得到的 wav当前主界面不再渲染底部吸附音频条右侧复刻工作表会读取该文件生成参考图式横向响度波形并和原视频、逐句时间轴联动波形标题栏显示当前播放秒数、总时长和鼠标指针停点秒数。</td></tr>
<tr><td>改写配音文件</td><td><code>GET /jobs/{id}/audio-script.mp3</code></td><td><code>apiAssetUrl(job.audio_script.voice_url)</code></td><td>后续新配音阶段保留的 TTS 产物;服务端固定走 <code>VOICE_PROVIDER=azure_openai</code>,通过 <code>AZURE_OPENAI_BASE_URL</code> 的 OpenAI 协议生成 mp3并按 <code>AZURE_TTS_PATHS</code> 依次尝试 <code>/audio/speech</code><code>/v1/audio/speech</code> 等路径。当前第一步不默认生成该文件。</td></tr>
<tr><td>手动加帧</td><td><code>POST /jobs/{id}/frames?t=</code></td><td><code>addManualFrame</code></td><td>按视频时间戳抽一帧index 递增但 frames 按 timestamp 排序。当前主界面会把原版视频播放器的播放秒数传给 <code>AudioIntakePanel</code> 标题栏右侧的“当前点抽帧”。</td></tr>
@@ -1035,6 +1038,19 @@ ProductRefStateItem {
<h2>变更记录</h2>
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
<div class="changelog">
<article class="change">
<header>
<h3>2026-05-18 · 模型 prompt 语言策略切到英文主值</h3>
<span class="tag cyan">Prompt</span>
<span class="tag violet">UI</span>
<span class="tag blue">API</span>
</header>
<div class="body">
<p><strong>问题:</strong>最终产物是英文 TikTok 二创广告,但前端默认分镜规划、首尾帧 prompt 和用户可编辑规划字段里混入中文,导致 <code>gpt-image-2</code>、脚本改写和后续视频模型收到中英混杂指令。</p>
<p><strong>改动:</strong><code>AudioStoryboardRow.role</code> 改为 <code>hook/pain/proof/solution/cta/bridge</code> 英文枚举UI 仍显示中文角色标签。<code>buildAudioStoryboardRows</code><code>buildVisualPlan</code><code>buildFirstFramePlan</code><code>buildLastFramePlan</code><code>buildSubjectDescription</code><code>buildEndpointFramePrompt</code><code>buildStoryboardSceneFromAudioRow</code> 的模型主字段改为英文,并新增 <code>*Zh</code> 镜像用于团队阅读。首尾帧提交前前端会对含中文 prompt 调 <code>translateText</code> 兜底,后端新增 <code>_ensure_english</code> 并挂到 <code>generate_scene_asset</code><code>generate_subject_assets</code>、脚本改写和音频分析入口。</p>
<p><strong>影响:</strong>发给 LLM / 生图 / 视频模型的主 prompt 默认全英文中文只作为团队审稿镜像、UI 标签和 toast。<code>AudioScript</code> 新增 <code>rewritten_text_zh</code><code>KeyElement</code> 新增 <code>subject_consensus_brief_zh</code><code>SubjectTemplateItem</code> 新增 <code>prompt_brief_zh</code><code>POST /jobs/{id}/script/rewrite</code> 返回 <code>text</code> 英文主值和 <code>text_zh</code> 中文镜像。</p>
</div>
</article>
<article class="change">
<header>
<h3>2026-05-18 · 首尾帧改为主体 brief + 产品少量硬参考</h3>

View File

@@ -54,6 +54,7 @@ import {
updateElement,
updateStoryboard,
uploadStoryboardAsset,
translateText,
videoUrl,
} from "@/lib/api"
import { type NodeData } from "@/components/nodes"
@@ -83,6 +84,7 @@ const VIDEO_MODELS = [
type VideoModel = (typeof VIDEO_MODELS)[number]["value"]
type BoardThemeMode = "dark" | "light"
type AudioStoryboardRole = "hook" | "pain" | "proof" | "solution" | "cta" | "bridge"
const BOARD_THEME_STORAGE_KEY = "skg-board-theme"
@@ -103,19 +105,28 @@ type AudioStoryboardRow = {
start: number
end: number
source: string
role: string
sourceZh: string
role: AudioStoryboardRole
visualMode: StoryboardVisualMode
needsProduct: boolean
needsSubject: boolean
subjectDescription: string
subjectDescriptionZh: string
skgCopy: string
skgCopyZh: string
visualPlan: string
visualPlanZh: string
firstFramePlan: string
firstFramePlanZh: string
lastFramePlan: string
lastFramePlanZh: string
referencePlan: string
keyElements: string
keyElementsZh: string
productIntegration: string
productIntegrationZh: string
productPlacement: string
productPlacementZh: string
}
type ProductRefItem = ProductRefStateItem
@@ -136,7 +147,7 @@ type ResolvedSubjectProfile = {
payload: SubjectProfilePreference
}
type StoryboardVisualMode = NonNullable<StoryboardScene["visual_mode"]>
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "subjectDescriptionZh" | "visualPlan" | "visualPlanZh" | "firstFramePlan" | "firstFramePlanZh" | "lastFramePlan" | "lastFramePlanZh" | "productIntegration" | "productIntegrationZh" | "productPlacement" | "productPlacementZh">>
type WorkflowStepId = "input" | "source" | "audio" | "visual" | "subject" | "product" | "script" | "scene" | "video"
type WorkflowStepStatus = "blocked" | "pending" | "running" | "ready" | "paused"
type WorkflowStep = {
@@ -324,6 +335,52 @@ const PRODUCT_USE_TAG_LABELS: Record<string, string> = {
material_texture: "材质",
}
const ROLE_LABELS_ZH: Record<AudioStoryboardRole, string> = {
hook: "开场钩子",
pain: "痛点推进",
proof: "利益证明",
solution: "方案过渡",
cta: "转化收口",
bridge: "节奏承接",
}
const ROLE_LABELS_EN: Record<AudioStoryboardRole, string> = {
hook: "hook",
pain: "pain build",
proof: "benefit proof",
solution: "solution transition",
cta: "conversion close",
bridge: "rhythm bridge",
}
const PRODUCT_VIEW_PROMPT_LABELS: Record<string, string> = {
front: "front / outer shell",
left_45: "wearer's left 45-degree view",
right_45: "wearer's right 45-degree view",
side_thickness: "side thickness view",
inner_contacts: "inner neck-contact pads",
back_bottom: "back / bottom structure",
}
const PRODUCT_BACKGROUND_PROMPT_LABELS: Record<string, string> = {
white: "white background",
black: "black background",
simple: "simple solid background",
complex: "complex background",
unknown: "unknown background",
}
const PRODUCT_USE_TAG_PROMPT_LABELS: Record<string, string> = {
hero_packshot: "hero packshot",
wearing_scale: "wearing scale",
inner_contact: "inner contact pads",
side_thickness: "side thickness",
asymmetry: "left-right asymmetry",
button_detail: "button detail",
back_bottom: "back/bottom structure",
material_texture: "material texture",
}
const controlClass =
"h-10 rounded-md border border-white/10 bg-black/55 px-3 text-[12px] text-white outline-none transition focus:border-cyan-300/60 disabled:cursor-not-allowed disabled:opacity-40"
@@ -352,6 +409,20 @@ function shortId(id?: string | null) {
return id ? id.slice(0, 8) : "-"
}
function containsCjk(text: string) {
return /[\u3400-\u9fff]/.test(text)
}
async function ensureEnglishForModel(text: string) {
const trimmed = text.trim()
if (!trimmed || !containsCjk(trimmed)) return trimmed
try {
return await translateText(trimmed, "en")
} catch {
return trimmed
}
}
function subjectProfileOption(category: SubjectProfileCategory, value: string) {
return category.options.find((option) => option.value === value) ?? category.options[0]
}
@@ -373,6 +444,16 @@ function resolveSubjectProfile(
const values = { ...DEFAULT_SUBJECT_PROFILE_DRAFT }
const labelParts: string[] = []
const promptParts: string[] = []
const promptLabelByKey: Record<SubjectProfileFieldKey, string> = {
gender: "gender presentation",
age: "age range",
wardrobe: "wardrobe style",
region_ethnicity: "regional or ethnic appearance cues",
skin_tone: "skin tone",
body: "body proportion",
hair: "hair style",
mood: "commercial mood",
}
for (const category of SUBJECT_PROFILE_CATEGORIES) {
const rawValue = draft[category.key] || "random"
let option = subjectProfileOption(category, rawValue)
@@ -382,7 +463,7 @@ function resolveSubjectProfile(
}
values[category.key] = option.value
labelParts.push(`${category.label}${option.label}`)
promptParts.push(`${category.label}: ${option.prompt}`)
promptParts.push(`${promptLabelByKey[category.key]}: ${option.prompt}`)
}
const summary = labelParts.join(" / ")
const promptSummary = promptParts.join("; ")
@@ -393,14 +474,14 @@ function resolveSubjectProfile(
promptSummary,
payload: {
mode,
gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).label,
age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).label,
wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).label,
region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).label,
skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).label,
body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).label,
hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).label,
mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).label,
gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).prompt,
age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).prompt,
wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).prompt,
region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).prompt,
skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).prompt,
body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).prompt,
hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).prompt,
mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).prompt,
resolved_summary: summary,
prompt_summary: promptSummary,
},
@@ -828,161 +909,254 @@ function buildFallbackScene(job: Job, frame: KeyFrame, order: number): Storyboar
const duration = Math.max(3.5, Math.min(7.5, Math.max(job.duration || 0, frames.length * 5) / Math.max(frames.length, 1)))
const audio = job.audio_script?.rewritten_text?.trim()
|| job.transcript?.slice(0, 4).map((item) => item.en || item.zh).filter(Boolean).join(" ")
|| "按原音频说话节奏改写为 SKG 产品介绍。"
|| "Rewrite the original audio pacing into a new SKG product introduction."
const objects = frame.description?.objects?.slice(0, 5).map((item) => item.name).filter(Boolean).join("、")
return {
duration: Number(duration.toFixed(1)),
first_image: null,
last_image: null,
subject: objects ? `关键元素候选:${objects}` : "保留原视频最重要的主体动作和构图关系。",
scene: `${frame.description?.scene || `按第 ${order + 1} 段音频规划 SKG 信息流广告分镜。`}\n音频节奏依据${audio.slice(0, 220)}`,
product: "把原素材里的产品/痛点转成 SKG 颈部/肩颈按摩仪表达,默认使用 SKG 四张产品角度图做产品真源。",
subject: objects ? `Key element candidates: ${objects}` : "Keep the source video's most important subject motion and composition relationship.",
scene: `${frame.description?.scene || `Plan SKG information-feed ad scene ${order + 1} from the audio segment.`}\nAudio pacing reference: ${audio.slice(0, 220)}`,
product: "Convert the source product or pain-point context into SKG neck-and-shoulder massager expression. Use the uploaded SKG product angles as product truth.",
action: frame.description?.style
? `沿用原画面的讲话节奏、动作节点和 ${frame.description.style},突出使用前紧绷、使用后放松。`
: "沿用原视频的讲话节奏和动作节点,突出使用前紧绷、使用后放松。",
? `Keep the source speaking rhythm, action beats, and ${frame.description.style}; show tension before use and relaxed comfort after use.`
: "Keep the source speaking rhythm and action beats; show tension before use and relaxed comfort after use.",
reference_ids: [],
}
}
function classifyAudioRole(text: string, index: number, total: number) {
function classifyAudioRole(text: string, index: number, total: number): AudioStoryboardRole {
const lower = text.toLowerCase()
if (index === 0) return "开场钩子"
if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "转化收口"
if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "痛点推进"
if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "利益证明"
if (/use|try|apple|product|bottle|one month/.test(lower)) return "方案过渡"
return "节奏承接"
if (index === 0) return "hook"
if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "cta"
if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "pain"
if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "proof"
if (/use|try|apple|product|bottle|one month/.test(lower)) return "solution"
return "bridge"
}
function buildSkgCopy(role: string, index: number) {
const variants: Record<string, string[]> = {
"开场钩子": [
function buildSkgCopy(role: AudioStoryboardRole, index: number) {
const variants: Record<AudioStoryboardRole, string[]> = {
hook: [
"If you spend hours looking down at your phone or working at a desk, your neck and shoulders may already be carrying that tension.",
"A few hours on screens can make your neck and shoulders feel tired faster than you expect.",
],
pain: [
"That tight neck, heavy shoulder feeling, and uncomfortable head lift are signs you should not wait to deal with it.",
"Commuting, desk work, parenting, and phone scrolling can keep your neck and shoulders tense all day.",
],
proof: [
"The SKG neck-and-shoulder massager sits around the back of your neck and shoulders, bringing warmth and kneading-like comfort right where you feel tight.",
"Wear it hands-free between work, at home, or before bed to settle into a calmer relaxation rhythm.",
],
solution: [
"This beat turns the source explanation into a clear SKG routine: pick it up, wear it, adjust the fit, and relax.",
"Let the product enter naturally, and show the change from neck tension to a more relaxed state.",
],
cta: [
"If you want neck-and-shoulder relaxation to become part of your daily routine, this SKG massager is an easy place to start.",
"Close with a clear product detail and a relaxed expression so viewers know exactly what to try next.",
],
bridge: [
"Keep the source video's short, fast rhythm, but anchor each line in a specific neck-and-shoulder moment or product action.",
"Use this line as a bridge from the pain point into the SKG routine without slowing the pace.",
],
}
const list = variants[role] ?? variants.bridge
return list[index % list.length]
}
function buildSkgCopyZh(role: AudioStoryboardRole, index: number) {
const variants: Record<AudioStoryboardRole, string[]> = {
hook: [
"如果你也经常低头刷手机、久坐办公,肩颈紧绷可能已经在悄悄影响状态。",
"每天盯屏几个小时,脖子和肩膀的疲惫会比你想得更早出现。",
],
"痛点推进": [
pain: [
"脖子发紧、肩膀沉、抬头不舒服,不一定要等到很难受才处理。",
"通勤、办公、带娃、刷手机叠在一起,肩颈很容易一直处在紧绷状态。",
],
"利益证明": [
proof: [
"SKG 颈部按摩仪贴合后颈和肩颈两侧,把热敷感和揉按感带到真正紧的位置。",
"戴上后不用占手,工作间隙、居家放松、睡前都能快速进入舒缓节奏。",
],
"方案过渡": [
solution: [
"这一镜把原片的讲解节奏换成 SKG 使用步骤:拿起、佩戴、贴合、放松。",
"让产品自然进入画面,重点不是硬推,而是把肩颈紧绷到放松的变化拍清楚。",
],
"转化收口": [
cta: [
"如果你也想把肩颈放松变成日常习惯,可以先从这台 SKG 开始。",
"最后用清晰产品特写和轻松状态收住,让用户知道现在就可以入手。",
],
"节奏承接": [
bridge: [
"延续原片短句快节奏,把每一句都落到一个具体肩颈场景或产品动作。",
"这一句作为过渡,画面从痛点切到产品,让节奏继续往下走。",
],
}
const list = variants[role] ?? variants["节奏承接"]
const list = variants[role] ?? variants.bridge
return list[index % list.length]
}
function buildVisualPlan(role: string) {
if (role === "开场钩子") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
if (role === "痛点推进") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
if (role === "利益证明") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
if (role === "转化收口") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
function buildVisualPlan(role: AudioStoryboardRole) {
if (role === "hook") return "Vertical close-up creator opening. The subject gently rubs the neck or rotates the shoulders to establish fatigue immediately."
if (role === "pain") return "Keep the source expression, gesture rhythm, and fast pacing while emphasizing phone posture, desk sitting, and neck-and-shoulder tension."
if (role === "proof") return "Bring the product into frame and place it around the back of the neck, then cut to fit, button, warmth, and kneading-comfort details."
if (role === "cta") return "End with a clean product detail plus a relaxed expression, keeping the quick action feeling of a feed ad."
return "Keep the source-style composition and camera movement, but replace the content with an SKG neck-and-shoulder relaxation scene."
}
function buildVisualPlanZh(role: AudioStoryboardRole) {
if (role === "hook") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
if (role === "pain") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
if (role === "proof") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
if (role === "cta") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
return "保持原片同类构图和运镜,把画面内容替换成 SKG 肩颈放松场景。"
}
function visualModeDefaults(mode: StoryboardVisualMode) {
function visualModeDefaults(mode: StoryboardVisualMode, language: "en" | "zh" = "en") {
if (mode === "person_only") {
return {
needsProduct: false,
needsSubject: true,
productPlacement: "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。",
productPlacement: language === "zh"
? "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。"
: "Do not show the product in this beat. Use the subject's state, pain point, or voice-over performance to carry the rhythm; do not force in the SKG product.",
}
}
if (mode === "product_only") {
return {
needsProduct: true,
needsSubject: false,
productPlacement: "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。",
productPlacement: language === "zh"
? "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。"
: "Show only the SKG neck-and-shoulder massager, wearing angle, or functional detail; do not force a main character into this beat.",
}
}
if (mode === "environment") {
return {
needsProduct: false,
needsSubject: false,
productPlacement: "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。",
productPlacement: language === "zh"
? "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。"
: "Use this beat as a scene, mood, or pacing transition. Do not show the product or main subject; keep only space, light, and motion rhythm.",
}
}
return {
needsProduct: true,
needsSubject: true,
productPlacement: "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。",
productPlacement: language === "zh"
? "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。"
: "Show the SKG neck-and-shoulder massager as an external wearable product, built around picking it up, wearing it, adjusting it, pressing controls, or relaxing with it.",
}
}
function visualModeForRole(role: string): StoryboardVisualMode {
if (role === "开场钩子" || role === "痛点推进") return "person_only"
if (role === "转化收口") return "product_only"
if (role === "节奏承接") return "environment"
function visualModeForRole(role: AudioStoryboardRole): StoryboardVisualMode {
if (role === "hook" || role === "pain") return "person_only"
if (role === "cta") return "product_only"
if (role === "bridge") return "environment"
return "person_product"
}
function buildFirstFramePlan(role: string) {
if (role === "开场钩子") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
if (role === "痛点推进") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
if (role === "利益证明") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
if (role === "方案过渡") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
if (role === "转化收口") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
function buildFirstFramePlan(role: AudioStoryboardRole) {
if (role === "hook") return "Close-up subject looking at camera or working with head down, one hand lightly touching the back of the neck, with no product visible yet."
if (role === "pain") return "Preserve the source action rhythm while making neck tension, looking down, neck rubbing, or desk-sitting posture clear."
if (role === "proof") return "The subject picks up or prepares to wear the SKG neck-and-shoulder massager; product position is clear but the action has just started."
if (role === "solution") return "Move from the pain state into picking up the product or bringing it toward the neck and shoulders, ready to begin use."
if (role === "cta") return "Clean product close-up or stable worn-product frame, leaving a strong visual focus for the conversion close."
return "Start from the current source sentence's composition to carry the rhythm without forcing a subject change."
}
function buildFirstFramePlanZh(role: AudioStoryboardRole) {
if (role === "hook") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
if (role === "pain") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
if (role === "proof") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
if (role === "solution") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
if (role === "cta") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
return "按原视频当前句的构图启动,先承接节奏,不强行改变镜头主体。"
}
function buildLastFramePlan(role: string) {
if (role === "开场钩子") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
if (role === "痛点推进") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
if (role === "利益证明") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
if (role === "方案过渡") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
if (role === "转化收口") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
function buildLastFramePlan(role: AudioStoryboardRole) {
if (role === "hook") return "The subject lifts the head or becomes more focused, leaving room for the product or solution to enter in the next beat."
if (role === "pain") return "Amplify the tense state into a clear stopping point, ready to cut into the product solution."
if (role === "proof") return "The product is correctly worn around the back of the neck and shoulders, the subject looks more relaxed, and product scale is stable."
if (role === "solution") return "The product fits against the neck and shoulders, hand adjustment is complete, and the frame can move into functional detail or relaxation."
if (role === "cta") return "Hold a stable product or worn-product frame with clean composition, ready for purchase or action-call continuation."
return "Advance the action slightly and hold a stable endpoint that connects naturally to the next sentence."
}
function buildLastFramePlanZh(role: AudioStoryboardRole) {
if (role === "hook") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
if (role === "pain") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
if (role === "proof") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
if (role === "solution") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
if (role === "cta") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
return "动作小幅推进并稳定停住,保留与下一句衔接的方向感。"
}
function buildSubjectDescription(role: string, visualMode: StoryboardVisualMode) {
function buildSubjectDescription(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
if (visualMode === "product_only" || visualMode === "environment") return ""
const base = "Consistent similar subject: a friendly transparent or semi-transparent humanoid with visible clean white skeleton inside, commercial not horror, with neck, collarbone, and upper-back areas clear for wearing a neck-and-shoulder massager."
if (role === "hook") return `${base} Front or upper-body creator speaking state, with a pain-point or curious expression that grabs attention quickly.`
if (role === "pain") return `${base} Neck-and-shoulder tension, looking down, desk posture, or rubbing the neck; make the neck line, shoulders, and upper back readable.`
if (role === "proof") return `${base} Relaxed state while wearing or about to wear the product, prioritizing neck-and-shoulder close-up, side, and back-neck angles.`
if (role === "solution") return `${base} Hands adjust the product or show wearable fit naturally; product placement must not hide important anatomy or device structure.`
if (role === "cta") return `${base} Stable, relaxed, clean ending state using front, three-quarter, or stable worn-product framing.`
return `${base} Keep one consistent subject identity, material, body type, gender presentation, and commercial mood across the whole video.`
}
function buildSubjectDescriptionZh(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
if (visualMode === "product_only" || visualMode === "environment") return ""
const base = "统一相似主体:透明或半透明皮肤包裹可见白色骨架的人形,广告感、非恐怖、肩颈/锁骨/上背区域清晰,适合佩戴肩颈按摩仪。"
if (role === "开场钩子") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
if (role === "痛点推进") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
if (role === "利益证明") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
if (role === "方案过渡") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
if (role === "转化收口") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
if (role === "hook") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
if (role === "pain") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
if (role === "proof") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
if (role === "solution") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
if (role === "cta") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
return `${base} 保持与整片一致的主体身份、材质、体型、性别表现和广告气质。`
}
function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
if (!job?.transcript.length) return []
return job.transcript.map((segment, index) => {
const source = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
const source = segment.en?.trim() || segment.zh?.trim() || "Source audio script pending."
const sourceZh = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
const role = classifyAudioRole(`${segment.en} ${segment.zh}`, index, job.transcript.length)
const visualMode = visualModeForRole(role)
const defaults = visualModeDefaults(visualMode)
const defaultsZh = visualModeDefaults(visualMode, "zh")
const keyElements = role === "proof"
? "wearing action, product position, hand pressing the control, relaxed expression"
: "creator framing, subject gesture, facial rhythm, scene lighting"
const keyElementsZh = role === "proof"
? "佩戴动作、产品位置、手部按键、放松表情"
: "口播构图、人物动作、表情节奏、场景光线"
return {
index: segment.index,
start: segment.start,
end: segment.end,
source,
sourceZh,
role,
visualMode,
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: buildSubjectDescription(role, visualMode),
subjectDescriptionZh: buildSubjectDescriptionZh(role, visualMode),
skgCopy: buildSkgCopy(role, index),
skgCopyZh: buildSkgCopyZh(role, index),
visualPlan: buildVisualPlan(role),
visualPlanZh: buildVisualPlanZh(role),
firstFramePlan: buildFirstFramePlan(role),
firstFramePlanZh: buildFirstFramePlanZh(role),
lastFramePlan: buildLastFramePlan(role),
referencePlan: `从原视频 ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s 定向抽 1-2 张参考帧。`,
keyElements: role === "利益证明" ? "佩戴动作、产品位置、手部按键、放松表情" : "口播构图、人物动作、表情节奏、场景光线",
productIntegration: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
lastFramePlanZh: buildLastFramePlanZh(role),
referencePlan: `Extract 1-2 targeted reference frames from source video ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s.`,
keyElements,
keyElementsZh,
productIntegration: "Replace the source product or prop context with the SKG white U-shaped neck-and-shoulder massager. The product must be worn externally around the neck and shoulders.",
productIntegrationZh: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
productPlacement: defaults.productPlacement,
productPlacementZh: defaultsZh.productPlacement,
}
})
}
@@ -1173,14 +1347,14 @@ function productReferenceNotes(items: ProductRefItem[]) {
if (!items.length) return ""
return items
.map((item, index) => {
const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_LABELS[tag]).filter(Boolean).join("/")
const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_PROMPT_LABELS[tag] ?? tag).filter(Boolean).join(", ")
const orientation = formatProductOrientation(item.orientation)
const direction = orientation ? `;方向:${orientation}` : ""
const landmarks = item.landmarks.length ? `;结构:${item.landmarks.join("/")}` : ""
const risk = item.risk ? `;风险:${item.risk}` : ""
return `${index + 1}. ${productViewLabel(item.view)}${productBackgroundLabel(item.background)}${tags}${item.note || "无补充备注"}${direction}${landmarks}${risk}`
const direction = orientation ? `; orientation: ${orientation}` : ""
const landmarks = item.landmarks.length ? `; structural landmarks: ${item.landmarks.join(", ")}` : ""
const risk = item.risk ? `; risk: ${item.risk}` : ""
return `${index + 1}. ${PRODUCT_VIEW_PROMPT_LABELS[item.view] ?? item.view} | ${PRODUCT_BACKGROUND_PROMPT_LABELS[item.background] ?? item.background} | ${tags || "general product reference"}: ${item.note || "no extra note"}${direction}${landmarks}${risk}`
})
.join("")
.join("; ")
}
function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
@@ -1189,11 +1363,11 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
visualMode: scene.visual_mode,
needsProduct: scene.needs_product,
needsSubject: scene.needs_subject,
subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("Subject source") && !line.startsWith("No main subject") && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("Visual mode") && !line.startsWith("First-frame plan") && !line.startsWith("Last-frame plan") && !line.startsWith("Source audio reference") && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
firstFramePlan: scene.first_frame_plan,
lastFramePlan: scene.last_frame_plan,
productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("Product requirement") && !line.startsWith("Product placement") && !line.startsWith("Product reference pool") && !line.startsWith("No product") && !line.startsWith("This beat") && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
productPlacement: scene.product_placement,
}
}
@@ -1206,34 +1380,40 @@ function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioSto
needsProduct: patch.needsProduct ?? row.needsProduct,
needsSubject: patch.needsSubject ?? row.needsSubject,
subjectDescription: patch.subjectDescription ?? row.subjectDescription,
subjectDescriptionZh: patch.subjectDescriptionZh ?? row.subjectDescriptionZh,
visualPlan: patch.visualPlan ?? row.visualPlan,
visualPlanZh: patch.visualPlanZh ?? row.visualPlanZh,
firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan,
firstFramePlanZh: patch.firstFramePlanZh ?? row.firstFramePlanZh,
lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan,
lastFramePlanZh: patch.lastFramePlanZh ?? row.lastFramePlanZh,
productIntegration: patch.productIntegration ?? row.productIntegration,
productIntegrationZh: patch.productIntegrationZh ?? row.productIntegrationZh,
productPlacement: patch.productPlacement ?? row.productPlacement,
productPlacementZh: patch.productPlacementZh ?? row.productPlacementZh,
}
}
function productPriorityForRow(row: AudioStoryboardRow) {
const viewPriorityByRole: Record<string, string[]> = {
"开场钩子": ["front", "left_45", "right_45", "side_thickness"],
"痛点推进": ["front", "side_thickness", "left_45", "right_45"],
"利益证明": ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
"方案过渡": ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
"转化收口": ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
"节奏承接": ["front", "left_45", "right_45", "side_thickness"],
const viewPriorityByRole: Record<AudioStoryboardRole, string[]> = {
hook: ["front", "left_45", "right_45", "side_thickness"],
pain: ["front", "side_thickness", "left_45", "right_45"],
proof: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
solution: ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
cta: ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
bridge: ["front", "left_45", "right_45", "side_thickness"],
}
const tagPriorityByRole: Record<string, string[]> = {
"开场钩子": ["hero_packshot", "asymmetry", "side_thickness"],
"痛点推进": ["wearing_scale", "side_thickness", "hero_packshot"],
"利益证明": ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
"方案过渡": ["wearing_scale", "hero_packshot", "inner_contact"],
"转化收口": ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
"节奏承接": ["hero_packshot", "asymmetry", "side_thickness"],
const tagPriorityByRole: Record<AudioStoryboardRole, string[]> = {
hook: ["hero_packshot", "asymmetry", "side_thickness"],
pain: ["wearing_scale", "side_thickness", "hero_packshot"],
proof: ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
solution: ["wearing_scale", "hero_packshot", "inner_contact"],
cta: ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
bridge: ["hero_packshot", "asymmetry", "side_thickness"],
}
return {
views: viewPriorityByRole[row.role] ?? viewPriorityByRole["节奏承接"],
tags: tagPriorityByRole[row.role] ?? tagPriorityByRole["节奏承接"],
views: viewPriorityByRole[row.role] ?? viewPriorityByRole.bridge,
tags: tagPriorityByRole[row.role] ?? tagPriorityByRole.bridge,
}
}
@@ -1245,17 +1425,17 @@ function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" |
if (!views.includes(view)) views.push(view)
if (tag && !tags.includes(tag)) tags.push(tag)
}
if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail")
if (/back neck|neck back|upper back|back view|back side|shoulder blade|last frame|worn|wearing complete|fit complete|后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
if (/side|profile|thickness|volume|left side|right side|45|adjust|pick up|bring.*neck|toward.*shoulder|侧面|侧身|厚度|侧厚|体积|左侧|右侧|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
if (/inner|contact pad|massage head|touching skin|neck contact|skin contact|内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
if (/wearing scale|upper body|worn on human|neck|shoulder|collarbone|佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
if (/button|control|switch|logo|按键|按钮|控制|开关/.test(text)) add("right_45", "button_detail")
return { views, tags }
}
function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
return /side|profile|thickness|back neck|upper back|back view|inner|contact pad|massage head|neck contact|close-up|closeup|button|control|worn|wearing complete|侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
? MAX_PRODUCT_REFS_PER_ENDPOINT
: 1
}
@@ -1336,26 +1516,42 @@ function subjectViewRoleHint(view: string) {
return hints[view] ?? "主体参考视角"
}
function subjectViewPromptHint(view: string) {
const hints: Record<string, string> = {
front: "front speaking shot, opening hook, expression, conversion close",
three_quarter_left: "left three-quarter angle, talking, pre-wear motion, natural turn",
three_quarter_right: "right three-quarter angle, talking, pre-wear motion, natural turn",
left: "left side, neck-and-shoulder side profile, wearing action, product thickness and position",
right: "right side, neck-and-shoulder side profile, wearing action, product thickness and position",
back: "back view, back neck and upper shoulders, product placement landing",
bust_front: "front neck-and-shoulder close-up, pain-point expression, wearing scale",
bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit",
}
return hints[view] ?? "subject reference view"
}
function subjectDescriptionForRow(row: AudioStoryboardRow, subjectRefs: SubjectPlanningRef[]) {
const trimmed = row.subjectDescription.trim()
if (trimmed) return trimmed
const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join("")
const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join(", ")
return [
"统一相似主体:使用已生成的主体视图作为人物真源,保持同一人物身份、体型、材质、年龄段、性别表现和广告气质。",
labels ? `可用主体视角:${labels}` : "",
"如果本条需要人物但缺少更具体描述,默认保持透明皮肤包裹白色骨架、非恐怖、肩颈区域清晰可佩戴产品。",
"Consistent similar subject: use the generated subject view pack as the character truth, maintaining one identity, body proportion, material, age range, gender presentation, and commercial mood.",
labels ? `Available subject views: ${labels}.` : "",
"If this beat needs a subject but lacks a specific description, default to a friendly transparent skin shell with visible white skeleton, non-horror, with clear neck and shoulder area for wearable product placement.",
].filter(Boolean).join("")
}
function subjectPriorityForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
const text = `${row.role} ${row.visualMode} ${row.subjectDescription} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productPlacement}`.toLowerCase()
if (/后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
if (/back neck|upper back|shoulder blade|back view|fit|worn|wearing complete|correctly worn|后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "left", "right", "bust_front", "three_quarter_left", "three_quarter_right", "front"]
}
if (/侧面|左侧|右侧|45|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
if (/side|left|right|45|adjust|pick up|prepare to wear|toward.*neck|hand|侧面|左侧|右侧|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
return ["bust_left_45", "bust_right_45", "left", "right", "three_quarter_left", "three_quarter_right", "bust_front", "front", "back_neck_detail", "back"]
}
if (/近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
if (/close-up|closeup|upper-body|bust|neck|shoulder|collarbone|rubbing.*neck|looking down|tense|tension|近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
return ["bust_front", "bust_left_45", "bust_right_45", "front", "three_quarter_left", "three_quarter_right", "left", "right", "back_neck_detail", "back"]
}
if (role === "last_frame" && row.needsProduct) {
@@ -1371,8 +1567,8 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
.map((ref, index) => {
const rank = priority.indexOf(ref.view)
const labelText = `${ref.label || ""} ${ref.roleHint}`.toLowerCase()
const closeupScore = /肩颈|后颈|近景|贴合|佩戴/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
&& /bust|neck|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
const closeupScore = /neck|shoulder|back neck|close-up|closeup|fit|wear|佩戴|肩颈|后颈|近景|贴合/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
&& /bust|neck|close-up|closeup|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
? 12
: 0
return { ref, score: (rank >= 0 ? 100 - rank * 8 : 0) + closeupScore - index }
@@ -1383,7 +1579,7 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
}
function subjectReferenceNotes(refs: SubjectPlanningRef[]) {
return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)}${ref.roleHint}`).join("")
return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)} | ${subjectViewPromptHint(ref.view)}`).join("; ")
}
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): SubjectPlanningRef[] {
@@ -1432,19 +1628,20 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
return [
`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}`,
`新口播文案:${row.skgCopy}`,
`镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}`,
`当前要生成的画面:${target}`,
`另一端画面用于连续性参考:${opposite}`,
`画面规划:${row.visualPlan}`,
`Storyboard beat ${row.index + 1}, ${role === "first_frame" ? "first frame" : "last frame"}.`,
`New English voice-over line: ${row.skgCopy}`,
`Narrative role: ${ROLE_LABELS_EN[row.role]}.`,
`Visual mode: ${row.visualMode}.`,
`Target endpoint frame to generate now: ${target}`,
`Opposite endpoint continuity reference: ${opposite}`,
`Overall visual plan: ${row.visualPlan}`,
row.needsSubject
? `人物主体 brief${subjectBrief || "主体 brief 暂缺,请保持一个统一的商业广告主体,肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述,不上传主体参考图;可以根据本镜头自由改变动作、景别、表情和环境,但不能换成另一个人设。不要回到原视频关键帧复刻人物。`
: "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
? `Subject identity brief: ${subjectBrief || "Subject brief is missing. Keep one unified commercial ad subject with clear neck-and-shoulder area for product placement."}. Use only this text identity brief; no subject reference image is uploaded. The subject may freely change pose, framing, expression, gesture, and environment for this shot, but must not become a different character. Do not copy the original source-video person or keyframe.`
: "This beat does not need a main character. If people appear, they should only be partial hands, back-view background figures, or environmental figures; do not generate the transparent skeleton main subject.",
row.needsProduct
? `产品融入:${row.productPlacement}${row.productIntegration}。本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品硬参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。`
: "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。",
"输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI不要水印。画面要能作为后续视频生成的明确起止帧。",
? `Product integration: ${row.productPlacement}. ${row.productIntegration}. This request provides ${selectedProductItems.length} rigid reference image(s) of the same SKG neck-and-shoulder massager: ${productNotes}. The product is a U-shaped wearable device worn around the neck and shoulders. Preserve realistic wearable scale, left-right asymmetry, button placement, contact pads, side thickness, and neck-contact position.`
: "Do not show the product in this beat. Do not force-generate an SKG product, package, white-background product image, or random merchandise.",
"Output one single 9:16 high-definition endpoint frame. No contact sheet, no multiple views, no subtitles, no platform UI, no watermark. The image must work as a clear first/last frame for downstream video generation.",
].join("\n")
}
@@ -1462,10 +1659,10 @@ function buildStoryboardSceneFromAudioRow(
const subjectNotes = subjectReferenceNotes(subjectRefs)
const subjectBrief = subjectBriefForEndpoint(row, subjectRefs)
const productGuidance = !row.needsProduct
? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
? "This beat is planned without product visibility or without product as the visual subject. Do not force-insert an SKG product, package, white-background product render, or incorrect merchandise during video generation."
: productItems.length
? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。产品硬定义:这是套在脖子上的 U 形肩颈按摩仪,不是耳机、头戴设备或护颈枕。坐标系硬规则:左/右按佩戴者身体左右,不能按图片左右;上=靠近下巴/脸/颈部上沿,下=靠近锁骨/肩部下沿;内侧=贴颈皮肤/按摩触点,外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。`
: "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。"
? `The product pool has ${productItems.length} image(s); this beat selects only the ${selectedProductItems.length} most relevant reference image(s). Do not mix unselected assets into this shot. Rigid product definition: this is a U-shaped neck-and-shoulder wearable massager, not headphones, a headset, or a neck pillow. Coordinate rule: left/right refer to the wearer's body, not the image; top means closer to chin/face/upper neck, bottom means closer to collarbone/shoulders; inner means skin-contact side and massage pads, outer means shell/buttons/logo. Selected images are only product structure, angle, scale, and detail references; do not copy the white/black/studio background. View notes: ${notes}. Preserve left-right asymmetry; do not mirror the two sides. The shoulder-neck product size must match realistic wearing scale, not earphone-small and not neck-pillow-large.`
: "No product images are uploaded. Use the default SKG product concept only if needed, and preferably establish a same-product pool before generation to lock left-right differences, thickness, and wearing scale."
return {
duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)),
first_image: endpointRefs.firstImage ?? null,
@@ -1482,11 +1679,11 @@ function buildStoryboardSceneFromAudioRow(
subject_images: row.needsSubject ? subjectRefs : [],
subject_image: row.needsSubject ? subjectRefs[0] ?? null : null,
subject: row.needsSubject
? `${subjectDescription}\n主体动作/画面要素:${row.keyElements}\n主体真源:从已生成的相似主体白底视图中按本镜头需求选择 ${subjectRefs.length} 张;${subjectNotes}。关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
: "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划:${row.firstFramePlan}\n尾帧规划:${row.lastFramePlan}\n原音频依据:${row.source}`,
product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`,
action: `${row.skgCopy}\n连续动作:从首帧规划自然过渡到尾帧规划,镜头类型和产品/人物需求不能中途改变。`,
? `${subjectDescription}\nSubject action and visual elements: ${row.keyElements}\nSubject source: select ${subjectRefs.length} generated similar-subject view(s) according to this shot's need; ${subjectNotes}. Source keyframes are only used for upstream subject extraction and must not be used as direct endpoint-frame references.`
: "No main character or similar-subject reference is needed for this beat. If people appear, they should be background or partial-body context, not the main subject.",
scene: `Visual mode: ${row.visualMode}\n${row.visualPlan}\nFirst-frame plan: ${row.firstFramePlan}\nLast-frame plan: ${row.lastFramePlan}\nSource audio reference: ${row.source}`,
product: `Product requirement: ${row.needsProduct ? "product reference required" : "no product required for this beat"}\nProduct placement: ${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "This beat focuses on emotion, subject state, space, or pacing transition and should not show the product."}\n${productGuidance}`,
action: `${row.skgCopy}\nContinuity action: transition naturally from the first-frame plan to the last-frame plan. The visual mode and product/subject requirements must not change mid-clip.`,
reference_ids: [],
}
}
@@ -3103,8 +3300,10 @@ function AudioStoryboardPlanPanel({
const [productAnalyzing, setProductAnalyzing] = useState(false)
const [productAngleBusy, setProductAngleBusy] = useState<string | null>(null)
const [copyOverrides, setCopyOverrides] = useState<Record<number, string>>({})
const [copyZhOverrides, setCopyZhOverrides] = useState<Record<number, string>>({})
const [planOverrides, setPlanOverrides] = useState<Record<number, RowPlanPatch>>({})
const [authorIntent, setAuthorIntent] = useState("")
const [showChineseMirror, setShowChineseMirror] = useState(true)
const [scriptRewriteBusy, setScriptRewriteBusy] = useState<"all" | number | null>(null)
const productFileRef = useRef<HTMLInputElement | null>(null)
const productPersistSeq = useRef(0)
@@ -3146,6 +3345,7 @@ function AudioStoryboardPlanPanel({
}
const copyForRow = (row: AudioStoryboardRow) => copyOverrides[row.index] ?? row.skgCopy
const copyZhForRow = (row: AudioStoryboardRow) => copyZhOverrides[row.index] ?? row.skgCopyZh
const patchRowCopy = (rowIndex: number, value: string) => {
setCopyOverrides((prev) => ({ ...prev, [rowIndex]: value }))
@@ -3163,7 +3363,9 @@ function AudioStoryboardPlanPanel({
needsProduct: defaults.needsProduct,
needsSubject: defaults.needsSubject,
subjectDescription: row ? buildSubjectDescription(row.role, mode) : "",
subjectDescriptionZh: row ? buildSubjectDescriptionZh(row.role, mode) : "",
productPlacement: defaults.productPlacement,
productPlacementZh: visualModeDefaults(mode, "zh").productPlacement,
})
}
@@ -3328,7 +3530,7 @@ function AudioStoryboardPlanPanel({
await analyzeAndCompleteProductViews(productItems.map((item) => item.ref))
}
const applyScriptRewriteItems = (items: Array<{ index: number; text: string }>) => {
const applyScriptRewriteItems = (items: Array<{ index: number; text: string; text_zh?: string }>) => {
if (!items.length) return
setCopyOverrides((prev) => {
const next = { ...prev }
@@ -3337,6 +3539,13 @@ function AudioStoryboardPlanPanel({
}
return next
})
setCopyZhOverrides((prev) => {
const next = { ...prev }
for (const item of items) {
if (item.text_zh?.trim()) next[item.index] = item.text_zh.trim()
}
return next
})
}
const rewriteSingleRow = async (row: AudioStoryboardRow) => {
@@ -3426,13 +3635,16 @@ function AudioStoryboardPlanPanel({
setEndpointFrameBusy(busyKey)
try {
await saveRowStoryboardDraft(plannedRow, frame)
const rawPrompt = buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief)
const prompt = await ensureEnglishForModel(rawPrompt)
const englishSubjectBrief = await ensureEnglishForModel(subjectBrief)
const updated = await generateSceneAsset(job.id, frame.index, {
size: SUBJECT_ASSET_SIZE,
scene_mode: "similar",
scene_style: "premium_product",
asset_role: role,
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief),
subject_brief: subjectBrief,
prompt,
subject_brief: englishSubjectBrief,
product_images: selectedProductItems.map((item) => item.ref),
source_frame_indices: [],
})
@@ -3622,6 +3834,13 @@ function AudioStoryboardPlanPanel({
/>
<div className="flex items-center justify-end gap-2">
<ModelTrace trace={scriptRewriteModelTrace(runtimeModels)} compact />
<button
type="button"
onClick={() => setShowChineseMirror((value) => !value)}
className="inline-flex h-9 items-center justify-center rounded-md border border-white/10 bg-white/[0.045] px-2.5 text-[11px] font-semibold text-white/60 transition hover:border-white/25 hover:text-white"
>
{showChineseMirror ? "收起中文" : "显示中文"}
</button>
<button
type="button"
onClick={() => void rewriteAllRows()}
@@ -3633,7 +3852,10 @@ function AudioStoryboardPlanPanel({
</button>
<button
type="button"
onClick={() => setCopyOverrides({})}
onClick={() => {
setCopyOverrides({})
setCopyZhOverrides({})
}}
disabled={scriptRewriteBusy !== null || !Object.keys(copyOverrides).length}
className="inline-flex h-9 items-center justify-center rounded-md border border-white/10 bg-white/[0.045] px-2.5 text-[11px] font-semibold text-white/60 transition hover:border-white/25 hover:text-white disabled:cursor-not-allowed disabled:opacity-35"
>
@@ -3657,6 +3879,7 @@ function AudioStoryboardPlanPanel({
const rowVideos = videosForFrame(referenceFrame)
const savingStoryboard = storyboardSaveBusyRow === row.index
const copyText = copyForRow(row)
const copyZhText = copyZhForRow(row)
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint").length : 0
const endpointSubjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
return (
@@ -3667,12 +3890,15 @@ function AudioStoryboardPlanPanel({
<StoryboardPlanCell label="分镜">
<div className="font-mono text-[11px] text-white/40">{row.start.toFixed(1)}-{row.end.toFixed(1)}s</div>
<div className="mt-1.5 inline-flex max-w-full rounded-md border border-emerald-300/15 bg-emerald-300/[0.08] px-1.5 py-0.5 text-[10px] leading-tight text-emerald-100/80">
{row.role}
{ROLE_LABELS_ZH[row.role]}
</div>
</StoryboardPlanCell>
<StoryboardPlanCell label="原内容">
<p className="line-clamp-2 text-[10.5px] leading-snug" title={row.source}>{row.source}</p>
{showChineseMirror && row.sourceZh ? (
<p className="mt-1 line-clamp-2 text-[10px] leading-snug text-white/34" title={row.sourceZh}>{row.sourceZh}</p>
) : null}
</StoryboardPlanCell>
<StoryboardPlanCell label={`${scriptStep.no} 新口播文案`}>
@@ -3681,6 +3907,9 @@ function AudioStoryboardPlanPanel({
onChange={(event) => patchRowCopy(row.index, event.target.value)}
className="min-h-[64px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[11px] leading-snug text-white/82 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
/>
{showChineseMirror && copyZhText ? (
<p className="mt-1 line-clamp-2 text-[10px] leading-snug text-white/34" title={copyZhText}>{copyZhText}</p>
) : null}
<button
type="button"
onClick={() => void rewriteSingleRow(row)}
@@ -3730,13 +3959,21 @@ function AudioStoryboardPlanPanel({
placeholder="画面规划"
className="min-h-[42px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/76 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
/>
{showChineseMirror && plannedRow.visualPlanZh ? (
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-white/32" title={plannedRow.visualPlanZh}>{plannedRow.visualPlanZh}</p>
) : null}
{plannedRow.needsSubject && (
<textarea
value={plannedRow.subjectDescription}
onChange={(event) => patchRowPlan(row.index, { subjectDescription: event.target.value })}
placeholder="人物描述:主体身份、姿态、情绪、需要用哪些视角"
className="min-h-[42px] w-full resize-y rounded border border-violet-300/12 bg-violet-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-violet-50/78 outline-none placeholder:text-white/25 focus:border-violet-300/50"
/>
<>
<textarea
value={plannedRow.subjectDescription}
onChange={(event) => patchRowPlan(row.index, { subjectDescription: event.target.value })}
placeholder="人物描述:主体身份、姿态、情绪、需要用哪些视角"
className="min-h-[42px] w-full resize-y rounded border border-violet-300/12 bg-violet-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-violet-50/78 outline-none placeholder:text-white/25 focus:border-violet-300/50"
/>
{showChineseMirror && plannedRow.subjectDescriptionZh ? (
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-violet-100/34" title={plannedRow.subjectDescriptionZh}>{plannedRow.subjectDescriptionZh}</p>
) : null}
</>
)}
<div className="grid gap-1 md:grid-cols-2">
<textarea
@@ -3752,12 +3989,21 @@ function AudioStoryboardPlanPanel({
className="min-h-[48px] w-full resize-y rounded border border-cyan-300/12 bg-cyan-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-cyan-50/78 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
/>
</div>
{showChineseMirror && (plannedRow.firstFramePlanZh || plannedRow.lastFramePlanZh) ? (
<div className="-mt-1 grid gap-1 md:grid-cols-2">
<p className="line-clamp-2 text-[10px] leading-snug text-emerald-100/34" title={plannedRow.firstFramePlanZh}>{plannedRow.firstFramePlanZh}</p>
<p className="line-clamp-2 text-[10px] leading-snug text-cyan-100/34" title={plannedRow.lastFramePlanZh}>{plannedRow.lastFramePlanZh}</p>
</div>
) : null}
<textarea
value={plannedRow.productPlacement}
onChange={(event) => patchRowPlan(row.index, { productPlacement: event.target.value })}
placeholder="产品出现方式:不出现 / 首帧出现 / 尾帧出现 / 全程佩戴 / 产品特写"
className="min-h-[38px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-rose-300/45"
/>
{showChineseMirror && plannedRow.productPlacementZh ? (
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-white/32" title={plannedRow.productPlacementZh}>{plannedRow.productPlacementZh}</p>
) : null}
<div className="grid gap-1.5 md:grid-cols-[minmax(0,1fr)_88px_88px]">
<div className="rounded border border-white/10 bg-black/24 px-2 py-1.5 text-[10px] leading-snug text-white/42">
<div className="mb-1 flex items-center justify-between gap-2">
@@ -3802,6 +4048,8 @@ function AudioStoryboardPlanPanel({
onClick={() => patchRowPlan(row.index, {
...visualModeDefaults(plannedRow.visualMode),
subjectDescription: buildSubjectDescription(plannedRow.role, plannedRow.visualMode),
subjectDescriptionZh: buildSubjectDescriptionZh(plannedRow.role, plannedRow.visualMode),
productPlacementZh: visualModeDefaults(plannedRow.visualMode, "zh").productPlacement,
})}
className="rounded border border-white/10 px-1.5 py-0.5 text-white/42 transition hover:border-white/25 hover:text-white/72"
>

View File

@@ -59,6 +59,7 @@ export interface KeyElement {
subject_kind?: SubjectKind
subject_assets?: SubjectAsset[]
subject_consensus_brief?: string
subject_consensus_brief_zh?: string
created_at?: number
}
@@ -261,7 +262,7 @@ export async function rewriteStoryboardScript(
author_intent?: string
segments: StoryboardScriptRewriteSegment[]
},
): Promise<{ items: Array<{ index: number; text: string }> }> {
): Promise<{ items: Array<{ index: number; text: string; text_zh?: string }> }> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/script/rewrite`, {
method: "POST",
headers: { "Content-Type": "application/json" },
@@ -573,6 +574,7 @@ export interface CharacterLibraryItem {
folder: string
description: string
prompt_brief?: string
prompt_brief_zh?: string
primary_image: string
images: CharacterLibraryImage[]
}
@@ -599,6 +601,7 @@ export interface SubjectTemplateItem {
description: string
note: string
prompt_brief?: string
prompt_brief_zh?: string
source: "database"
source_job_id: string
source_frame_idx: number
@@ -623,6 +626,7 @@ export interface AudioScript {
source_text: string
source_zh: string
rewritten_text: string
rewritten_text_zh?: string
speaker_profile: string
rhythm_profile: string
background_audio_profile: string