feat: enforce english prompt language strategy
This commit is contained in:
205
api/main.py
205
api/main.py
@@ -91,7 +91,7 @@ YTDLP_COOKIES_FILE = os.getenv("YTDLP_COOKIES_FILE", "").strip()
|
||||
YTDLP_COOKIES_FROM_BROWSER = os.getenv("YTDLP_COOKIES_FROM_BROWSER", "").strip()
|
||||
AUDIO_PRODUCT_BRIEF = os.getenv(
|
||||
"AUDIO_PRODUCT_BRIEF",
|
||||
"SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。",
|
||||
"SKG smart massage products for everyday neck-and-shoulder, back, eye, knee, or foot relaxation. Ads should feel premium, clean, trustworthy, and must not make medical efficacy claims.",
|
||||
).strip()
|
||||
AUDIO_REWRITE_MODEL = gpt_model_env("AUDIO_REWRITE_MODEL", REWRITE_MODEL)
|
||||
VOICE_PROVIDER = "azure_openai"
|
||||
@@ -454,6 +454,7 @@ class CharacterLibraryItem(BaseModel):
|
||||
folder: str = ""
|
||||
description: str = ""
|
||||
prompt_brief: str = ""
|
||||
prompt_brief_zh: str = ""
|
||||
primary_image: str = ""
|
||||
images: list[CharacterLibraryImage] = Field(default_factory=list)
|
||||
|
||||
@@ -480,6 +481,7 @@ class SubjectTemplateItem(BaseModel):
|
||||
description: str = ""
|
||||
note: str = ""
|
||||
prompt_brief: str = ""
|
||||
prompt_brief_zh: str = ""
|
||||
source: Literal["database"] = "database"
|
||||
source_job_id: str = ""
|
||||
source_frame_idx: int = -1
|
||||
@@ -534,6 +536,7 @@ class KeyElement(BaseModel):
|
||||
subject_kind: SubjectKind = "object"
|
||||
subject_assets: list[SubjectAsset] = Field(default_factory=list)
|
||||
subject_consensus_brief: str = ""
|
||||
subject_consensus_brief_zh: str = ""
|
||||
created_at: float = 0.0
|
||||
|
||||
|
||||
@@ -565,6 +568,7 @@ class AudioScript(BaseModel):
|
||||
source_text: str = ""
|
||||
source_zh: str = ""
|
||||
rewritten_text: str = ""
|
||||
rewritten_text_zh: str = ""
|
||||
speaker_profile: str = ""
|
||||
rhythm_profile: str = ""
|
||||
background_audio_profile: str = ""
|
||||
@@ -2307,7 +2311,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
|
||||
fallback = _fallback_audio_profile(segments, target_seconds)
|
||||
if not LLM_API_KEY or not wav.exists():
|
||||
return fallback
|
||||
transcript = _transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript."
|
||||
transcript = _ensure_english(_transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript.")
|
||||
try:
|
||||
audio_b64 = base64.b64encode(wav.read_bytes()).decode("ascii")
|
||||
except Exception:
|
||||
@@ -2373,12 +2377,15 @@ def _build_audio_intake_sync(job_id: str, wav: Path, segments: list[TranscriptSe
|
||||
)
|
||||
|
||||
|
||||
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]:
|
||||
def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str, str]:
|
||||
fallback = _fallback_audio_script(segments, target_seconds)
|
||||
try:
|
||||
fallback_zh = _translate_text_sync(fallback, "zh", max_tokens=300) if LLM_API_KEY else ""
|
||||
except Exception:
|
||||
fallback_zh = ""
|
||||
if not LLM_API_KEY:
|
||||
return fallback, "LLM_API_KEY 未配置,使用本地 SKG 模板"
|
||||
return fallback, fallback_zh, "LLM_API_KEY 未配置,使用本地 SKG 模板"
|
||||
source_text = _transcript_join(segments, "en")
|
||||
source_zh = _transcript_join(segments, "zh")
|
||||
min_words, max_words = _voiceover_target_words(target_seconds)
|
||||
prompt = (
|
||||
"You are an English short-video voice-over writer for SKG wellness massagers. "
|
||||
@@ -2392,10 +2399,9 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
|
||||
"5. Introduce SKG products directly: smart massage, warmth, rhythm, daily neck/back/eye/knee/foot relaxation.\n"
|
||||
"6. Keep it easy for TTS: short sentences, spoken phrasing, no hashtags, no stage directions, no quotation marks.\n"
|
||||
"7. If the source transcript is thin, ignore it and write a general SKG product intro.\n"
|
||||
'Return strict JSON only: {"rewritten_text":"..."}.\n\n'
|
||||
f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n"
|
||||
f"English transcript:\n{source_text or 'None'}\n\n"
|
||||
f"Chinese translation for reference:\n{source_zh or 'None'}"
|
||||
'Return strict JSON only: {"rewritten_text":"English VO","rewritten_text_zh":"Simplified Chinese mirror for team review"}.\n\n'
|
||||
f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
|
||||
f"English transcript:\n{source_text or 'None'}"
|
||||
)
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
@@ -2415,9 +2421,12 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds
|
||||
raw = match.group(0) if match else raw
|
||||
data = json.loads(raw)
|
||||
text = str(data.get("rewritten_text", "")).strip()
|
||||
return (text or fallback), ""
|
||||
text_zh = str(data.get("rewritten_text_zh", "")).strip()
|
||||
if text and not text_zh:
|
||||
text_zh = _translate_text_sync(text, "zh", max_tokens=300)
|
||||
return (text or fallback), (text_zh or fallback_zh), ""
|
||||
except Exception as e:
|
||||
return fallback, f"改写失败,使用本地模板:{e}"
|
||||
return fallback, fallback_zh, f"改写失败,使用本地模板:{e}"
|
||||
|
||||
|
||||
def _choose_azure_voice_id() -> str:
|
||||
@@ -2521,7 +2530,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
||||
source_text = _transcript_join(segments, "en")
|
||||
source_zh = _transcript_join(segments, "zh")
|
||||
duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0)
|
||||
rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration)
|
||||
rewritten, rewritten_zh, rewrite_error = _rewrite_audio_script_sync(segments, duration)
|
||||
selected_voice_id = _choose_tts_voice_id()
|
||||
speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id)
|
||||
voice_url = ""
|
||||
@@ -2539,6 +2548,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar
|
||||
source_text=source_text,
|
||||
source_zh=source_zh,
|
||||
rewritten_text=rewritten,
|
||||
rewritten_text_zh=rewritten_zh,
|
||||
speaker_profile=speaker_profile,
|
||||
rhythm_profile=rhythm_profile,
|
||||
product_brief=AUDIO_PRODUCT_BRIEF,
|
||||
@@ -3055,6 +3065,55 @@ class RewriteStoryboardScriptReq(BaseModel):
|
||||
segments: list[ScriptRewriteSegmentReq] = Field(default_factory=list)
|
||||
|
||||
|
||||
_TRANSLATION_CACHE: dict[str, str] = {}
|
||||
|
||||
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
return bool(re.search(r"[\u3400-\u9fff]", text or ""))
|
||||
|
||||
|
||||
def _translate_text_sync(text: str, target: Literal["en", "zh"] = "en", *, max_tokens: int = 700) -> str:
|
||||
text = (text or "").strip()
|
||||
if not text or not LLM_API_KEY:
|
||||
return text
|
||||
target_label = "English" if target == "en" else "Simplified Chinese"
|
||||
prompt = (
|
||||
f"Translate the following TikTok ad planning text into concise natural {target_label}. "
|
||||
"Preserve concrete product, camera, subject, timing, and structure details. "
|
||||
"Do not add commentary, markdown, quotes, or explanations.\n\n"
|
||||
f"Input:\n{text}"
|
||||
)
|
||||
resp = llm().chat.completions.create(
|
||||
model=TRANSLATE_MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.15,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
out = (resp.choices[0].message.content or "").strip()
|
||||
if not out:
|
||||
rc = getattr(resp.choices[0].message, "reasoning_content", "") or ""
|
||||
if rc:
|
||||
out = rc.strip().splitlines()[-1].strip()
|
||||
return re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip() or text
|
||||
|
||||
|
||||
def _ensure_english(text: str) -> str:
|
||||
text = (text or "").strip()
|
||||
if not text or not _contains_cjk(text):
|
||||
return text
|
||||
key = hashlib.sha256(("en\0" + text).encode("utf-8")).hexdigest()
|
||||
cached = _TRANSLATION_CACHE.get(key)
|
||||
if cached:
|
||||
return cached
|
||||
try:
|
||||
translated = _translate_text_sync(text, "en", max_tokens=max(700, min(3500, len(text) // 2 + 900)))
|
||||
_TRANSLATION_CACHE[key] = translated
|
||||
return translated
|
||||
except Exception as e:
|
||||
print(f"[ensure english fallback] {e}", flush=True)
|
||||
return text
|
||||
|
||||
|
||||
@app.post("/translate")
|
||||
def translate_text(req: TranslateReq) -> dict:
|
||||
"""单条文本翻译(给生图自定义提取元素 zh→en 用)"""
|
||||
@@ -3092,22 +3151,26 @@ def translate_text(req: TranslateReq) -> dict:
|
||||
|
||||
def _fallback_script_rewrite_item(segment: ScriptRewriteSegmentReq, author_intent: str = "") -> dict:
|
||||
source = (segment.source or "").strip()
|
||||
intent = (author_intent or "").strip()
|
||||
intent = _ensure_english(author_intent or "")
|
||||
role = segment.role or ""
|
||||
templates = {
|
||||
"开场钩子": "你有没有发现,低头久了以后,脖子和肩膀会先替你喊累。",
|
||||
"痛点推进": "刷手机、坐电脑、赶通勤叠在一起,肩颈很容易一直绷着放不下来。",
|
||||
"利益证明": "SKG 这种挂脖按摩仪,重点就是贴住肩颈位置,把热敷感和揉按感带到真正紧的地方。",
|
||||
"方案过渡": "这一段可以直接拍拿起、戴上、贴合,让产品自然进入日常放松场景。",
|
||||
"转化收口": "如果你也想把肩颈放松变成每天的小习惯,可以从这台 SKG 开始。",
|
||||
"节奏承接": "顺着原片节奏,把这一句落到一个具体的肩颈使用场景里。",
|
||||
"hook": "Have you noticed that after hours of looking down, your neck and shoulders complain before you do?",
|
||||
"pain": "Phone scrolling, desk work, and commuting can keep your neck and shoulders tight all day.",
|
||||
"proof": "An SKG wearable massager sits around the neck and shoulders, bringing warm, rhythmic comfort to the spots that feel tense.",
|
||||
"solution": "This beat can simply show pick up, wear, fit, and relax, so the product enters a normal daily routine.",
|
||||
"cta": "If you want neck-and-shoulder relaxation to become a daily habit, start with this SKG massager.",
|
||||
"bridge": "Follow the source rhythm, but land this line in one specific neck-and-shoulder use moment.",
|
||||
}
|
||||
rewritten = templates.get(role, templates["节奏承接"])
|
||||
if source and role not in {"开场钩子", "转化收口"}:
|
||||
rewritten = f"{rewritten} 原片这一句的节奏可以保留,但内容换成 SKG 的佩戴和放松体验。"
|
||||
rewritten = templates.get(role, templates["bridge"])
|
||||
if source and role not in {"hook", "cta"}:
|
||||
rewritten = f"{rewritten} Keep the source sentence rhythm, but replace the content with SKG wearing and relaxation experience."
|
||||
if intent:
|
||||
rewritten = f"{rewritten} 语气按作者想法处理:{intent[:44]}。"
|
||||
return {"index": segment.index, "text": rewritten[:220]}
|
||||
rewritten = f"{rewritten} Adjust the tone based on the creator note: {intent[:90]}."
|
||||
try:
|
||||
zh = _translate_text_sync(rewritten, "zh", max_tokens=260) if LLM_API_KEY else ""
|
||||
except Exception:
|
||||
zh = ""
|
||||
return {"index": segment.index, "text": rewritten[:260], "text_zh": zh}
|
||||
|
||||
|
||||
def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentReq], author_intent: str = "") -> list[dict]:
|
||||
@@ -3123,7 +3186,7 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
|
||||
raw_items = data.get("items") if isinstance(data, dict) else data
|
||||
if not isinstance(raw_items, list):
|
||||
raw_items = []
|
||||
by_index: dict[int, str] = {}
|
||||
by_index: dict[int, tuple[str, str]] = {}
|
||||
for item in raw_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -3132,19 +3195,27 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe
|
||||
except Exception:
|
||||
continue
|
||||
value = str(item.get("text") or item.get("rewritten_text") or "").strip()
|
||||
value_zh = str(item.get("text_zh") or item.get("rewritten_text_zh") or "").strip()
|
||||
if value:
|
||||
by_index[idx] = re.sub(r"\s+", " ", value).strip()[:260]
|
||||
return [
|
||||
{"index": segment.index, "text": by_index.get(segment.index) or _fallback_script_rewrite_item(segment, author_intent)["text"]}
|
||||
for segment in requested
|
||||
]
|
||||
by_index[idx] = (re.sub(r"\s+", " ", value).strip()[:260], re.sub(r"\s+", " ", value_zh).strip()[:260])
|
||||
items = []
|
||||
for segment in requested:
|
||||
fallback = _fallback_script_rewrite_item(segment, author_intent)
|
||||
text, text_zh = by_index.get(segment.index, ("", ""))
|
||||
if text and not text_zh:
|
||||
try:
|
||||
text_zh = _translate_text_sync(text, "zh", max_tokens=260) if LLM_API_KEY else ""
|
||||
except Exception:
|
||||
text_zh = ""
|
||||
items.append({"index": segment.index, "text": text or fallback["text"], "text_zh": text_zh or fallback.get("text_zh", "")})
|
||||
return items
|
||||
|
||||
|
||||
def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dict]:
|
||||
segments = [segment for segment in req.segments if (segment.source or segment.current_text).strip()]
|
||||
if not segments:
|
||||
return []
|
||||
author_intent = (req.author_intent or "").strip()
|
||||
author_intent = _ensure_english(req.author_intent or "")
|
||||
if not LLM_API_KEY:
|
||||
return [_fallback_script_rewrite_item(segment, author_intent) for segment in segments]
|
||||
payload = [
|
||||
@@ -3152,26 +3223,27 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
|
||||
"index": segment.index,
|
||||
"time": f"{segment.start:.1f}-{segment.end:.1f}s",
|
||||
"role": segment.role,
|
||||
"source_reference": segment.source,
|
||||
"current_voiceover": segment.current_text,
|
||||
"source_reference": _ensure_english(segment.source),
|
||||
"current_voiceover": _ensure_english(segment.current_text),
|
||||
}
|
||||
for segment in segments
|
||||
]
|
||||
prompt = (
|
||||
"你是信息流广告脚本文案改写师。任务:基于原参考文案的节奏和信息结构,把每段改写成 SKG 挂脖肩颈按摩仪的新口播文案。\n"
|
||||
"硬规则:\n"
|
||||
"1. 输出中文短视频口播,不要英文,不要舞台说明,不要引号。\n"
|
||||
"2. 不逐字翻译原文,不保留原品牌、价格、优惠码、平台话术;只参考节奏、钩子、痛点、转化结构。\n"
|
||||
"3. 产品固定为套在脖子上的 U 形肩颈按摩仪,表达肩颈紧绷、久坐低头、热敷感、揉按感、佩戴放松和日常使用场景。\n"
|
||||
"4. 避免医疗疗效、治疗、治愈、止痛等强功效承诺。\n"
|
||||
"5. 每段尽量短,适配该段时间;保持自然创作者口吻。\n"
|
||||
"6. mode=all 时,整片要前后连贯;mode=segment 时,只改给定段落但仍要贴合上下文风格。\n"
|
||||
f"作者想法:{author_intent or '没有额外想法,按原片节奏改成自然卖点口播。'}\n"
|
||||
f"改写模式:{req.mode}\n"
|
||||
f"SKG 产品背景:{AUDIO_PRODUCT_BRIEF}\n\n"
|
||||
"输入段落 JSON:\n"
|
||||
"You are an information-feed ad voice-over rewrite specialist. Rewrite each segment into a new ENGLISH SKG neck-and-shoulder massager voice-over line while preserving the source rhythm and information structure.\n"
|
||||
"Hard rules:\n"
|
||||
"1. The main text field must be English short-video VO. No stage directions, no quotes.\n"
|
||||
"2. Do not translate word-for-word. Do not keep the original brand, price, discount code, platform CTA, or exact claims; only reuse rhythm, hook, pain-point, proof, and conversion structure.\n"
|
||||
"3. The product is a U-shaped neck-and-shoulder wearable massager worn around the neck. Express neck/shoulder tension, desk posture, looking down, warmth, kneading-like comfort, wearing, relaxation, and daily use.\n"
|
||||
"4. Avoid medical treatment, cure, pain elimination, clinical, or disease claims.\n"
|
||||
"5. Keep each segment short enough for its time range and natural for a creator voice.\n"
|
||||
"6. If mode=all, make the whole piece coherent; if mode=segment, rewrite only the given segment while matching the broader style.\n"
|
||||
"7. Also return a Simplified Chinese mirror for team review in text_zh; it is not for model prompts.\n"
|
||||
f"Creator note: {author_intent or 'No extra note; follow the source pacing and turn it into natural SKG product VO.'}\n"
|
||||
f"Rewrite mode: {req.mode}\n"
|
||||
f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n"
|
||||
"Input segments JSON:\n"
|
||||
+ json.dumps(payload, ensure_ascii=False)
|
||||
+ '\n\n只输出严格 JSON:{"items":[{"index":0,"text":"改写后的中文口播"}]}'
|
||||
+ '\n\nReturn strict JSON only: {"items":[{"index":0,"text":"rewritten English VO","text_zh":"中文镜像"}]}'
|
||||
)
|
||||
models = []
|
||||
for model in [AUDIO_REWRITE_MODEL, ASR_FALLBACK_MODEL, TRANSLATE_MODEL]:
|
||||
@@ -3182,7 +3254,7 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic
|
||||
resp = llm().chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": "只返回合法 JSON,不要 markdown,不要解释。"},
|
||||
{"role": "system", "content": "Return valid JSON only. No markdown. No explanation."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
@@ -3950,6 +4022,7 @@ class UpdateElementReq(BaseModel):
|
||||
name_en: str | None = None
|
||||
position: str | None = None
|
||||
subject_consensus_brief: str | None = None
|
||||
subject_consensus_brief_zh: str | None = None
|
||||
|
||||
|
||||
class GenerateSceneAssetReq(BaseModel):
|
||||
@@ -3998,8 +4071,8 @@ class GenerateSubjectAssetsReq(BaseModel):
|
||||
def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) -> str:
|
||||
if not profile:
|
||||
return ""
|
||||
prompt_summary = (profile.prompt_summary or "").strip()
|
||||
resolved_summary = (profile.resolved_summary or "").strip()
|
||||
prompt_summary = _ensure_english(profile.prompt_summary or "")
|
||||
resolved_summary = _ensure_english(profile.resolved_summary or "")
|
||||
if prompt_summary:
|
||||
body = prompt_summary[:1400]
|
||||
else:
|
||||
@@ -4013,7 +4086,7 @@ def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) ->
|
||||
("hair style", profile.hair),
|
||||
("commercial mood", profile.mood),
|
||||
]
|
||||
body = "; ".join(f"{name}: {value.strip()}" for name, value in parts if value and value.strip())[:1400]
|
||||
body = "; ".join(f"{name}: {_ensure_english(value.strip())}" for name, value in parts if value and value.strip())[:1400]
|
||||
if not body and not resolved_summary:
|
||||
return ""
|
||||
mode = "random-composed" if profile.mode == "random" else "manually selected"
|
||||
@@ -4125,7 +4198,9 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq
|
||||
if req.position is not None:
|
||||
e.position = req.position.strip()
|
||||
if req.subject_consensus_brief is not None:
|
||||
e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200]
|
||||
e.subject_consensus_brief = _ensure_english(req.subject_consensus_brief.strip())[:2200]
|
||||
if req.subject_consensus_brief_zh is not None:
|
||||
e.subject_consensus_brief_zh = req.subject_consensus_brief_zh.strip()[:2200]
|
||||
new_frames.append(f)
|
||||
if not found:
|
||||
raise HTTPException(404, "element not found")
|
||||
@@ -4208,7 +4283,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
if confirmed_subjects
|
||||
else "Remove the main foreground subject from the frame if present. "
|
||||
)
|
||||
subject_brief = req.subject_brief.strip()
|
||||
subject_brief = _ensure_english(req.subject_brief.strip())
|
||||
subject_brief_clause = (
|
||||
f"Subject identity (text only, no image reference): {subject_brief[:1800]}. "
|
||||
"Maintain this identity across this and other endpoint frames in the same storyboard. "
|
||||
@@ -4237,7 +4312,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J
|
||||
"warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.",
|
||||
"cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.",
|
||||
}[req.scene_style]
|
||||
user_prompt = req.prompt.strip()
|
||||
user_prompt = _ensure_english(req.prompt.strip())
|
||||
user_prompt_clause = (
|
||||
"User scene direction: " + user_prompt[:1200] + " "
|
||||
if user_prompt
|
||||
@@ -4483,6 +4558,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip()
|
||||
if similar_mode and not brief:
|
||||
brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note)
|
||||
brief = _ensure_english(brief)
|
||||
selected_template_brief = brief.strip()
|
||||
template_brief_clause = (
|
||||
f"Reference character brief from saved database template '{template.name}': {brief}. "
|
||||
@@ -4496,6 +4572,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
character_label = character.name
|
||||
character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7])
|
||||
brief = character.prompt_brief.strip() or character.description.strip()
|
||||
brief = _ensure_english(brief)
|
||||
selected_template_brief = brief.strip()
|
||||
template_brief_clause = (
|
||||
f"Reference character brief from built-in creative character '{character.name}': {brief}. "
|
||||
@@ -4558,7 +4635,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
if req.reconstruction_mode == "similar"
|
||||
else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
|
||||
)
|
||||
prompt_extra = req.prompt.strip()
|
||||
prompt_extra = _ensure_english(req.prompt.strip())
|
||||
prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else ""
|
||||
subject_profile_clause = _subject_profile_prompt_clause(req.subject_profile)
|
||||
identity_lock_clause = (
|
||||
@@ -4709,7 +4786,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
]
|
||||
fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800]
|
||||
if selected_template_brief:
|
||||
e.subject_consensus_brief = selected_template_brief[:1800]
|
||||
e.subject_consensus_brief = _ensure_english(selected_template_brief)[:1800]
|
||||
else:
|
||||
asset_paths = [
|
||||
job_dir(job_id) / "assets" / f"{asset.id}.jpg"
|
||||
@@ -4722,9 +4799,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat
|
||||
asset_paths,
|
||||
fallback_brief,
|
||||
)
|
||||
e.subject_consensus_brief = brief or current_brief or fallback_brief or (
|
||||
e.subject_consensus_brief = _ensure_english(brief or current_brief or fallback_brief or (
|
||||
"Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area."
|
||||
)
|
||||
))[:1800]
|
||||
if e.subject_consensus_brief and not e.subject_consensus_brief_zh:
|
||||
try:
|
||||
e.subject_consensus_brief_zh = _translate_text_sync(e.subject_consensus_brief, "zh", max_tokens=500)[:1800]
|
||||
except Exception:
|
||||
e.subject_consensus_brief_zh = ""
|
||||
new_frames.append(f)
|
||||
if generation_errors:
|
||||
msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张,失败 {len(generation_errors)} 张"
|
||||
@@ -5296,18 +5378,23 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe
|
||||
raise HTTPException(404, "subject asset files missing")
|
||||
|
||||
primary = next((image.id for image in images if image.view == "front"), images[0].id)
|
||||
prompt_brief = _describe_subject_template_from_images(
|
||||
prompt_brief = _ensure_english(_describe_subject_template_from_images(
|
||||
name,
|
||||
req.subject_style,
|
||||
saved_image_paths,
|
||||
req.note.strip(),
|
||||
) or req.note.strip()
|
||||
) or req.note.strip())
|
||||
try:
|
||||
prompt_brief_zh = _translate_text_sync(prompt_brief, "zh", max_tokens=500) if prompt_brief else ""
|
||||
except Exception:
|
||||
prompt_brief_zh = ""
|
||||
item = SubjectTemplateItem(
|
||||
id=template_id,
|
||||
name=name,
|
||||
description=req.note.strip(),
|
||||
note=req.note.strip(),
|
||||
prompt_brief=prompt_brief,
|
||||
prompt_brief_zh=prompt_brief_zh,
|
||||
source_job_id=job_id,
|
||||
source_frame_idx=frame.index,
|
||||
source_element_id=element.id,
|
||||
|
||||
@@ -663,7 +663,7 @@ api/main.py
|
||||
</div>
|
||||
<div class="flow-row">
|
||||
<div><strong>你看到的区域</strong><span>信息流复刻分镜工作台</span></div>
|
||||
<div><strong>主要源码</strong><span><code>AudioStoryboardPlanPanel</code>、<code>ProductReferenceCard</code>、<code>MissingProductViewSlot</code>、<code>buildAudioStoryboardRows</code>、<code>selectProductItemsForRow</code>、<code>subjectAssetRefsForPlanning</code>、<code>subjectBriefForEndpoint</code>、<code>endpointAssetRef</code>、<code>buildEndpointFramePrompt</code>、<code>buildStoryboardSceneFromAudioRow</code>、<code>generateEndpointFrameForRow</code>、<code>saveRowStoryboardDraft</code>、<code>saveAllStoryboardDrafts</code>、<code>EndpointFrameSlot</code>、<code>StoryboardVideoSlots</code> in <code>web/components/ad-recreation-board.tsx</code>;产品图、首尾帧和视频候选缩略图统一复用 <code>MediaAssetTile</code>,包括顶层 hover 放大和删除入口。产品白底图上传复用 <code>uploadStoryboardAsset</code>,视角自动识别调用 <code>analyzeProductViews</code>,缺角度自动补图调用 <code>generateProductAngleAsset</code>。当前单条/批量按钮只保存规划;首尾帧按钮调用 <code>generateSceneAsset</code>,传 <code>subject_brief</code> 和端点选择后的 1-2 张 <code>product_images</code>,不再传主体图或 contact sheet,再用 <code>PUT /frames/{idx}/storyboard</code> 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。<code>web/app/page.tsx</code> 的视频提交回调有暂停保护,旧入口误触也不会请求 <code>/storyboard/video</code>。</span></div>
|
||||
<div><strong>主要源码</strong><span><code>AudioStoryboardPlanPanel</code>、<code>ProductReferenceCard</code>、<code>MissingProductViewSlot</code>、<code>buildAudioStoryboardRows</code>、<code>selectProductItemsForRow</code>、<code>subjectAssetRefsForPlanning</code>、<code>subjectBriefForEndpoint</code>、<code>endpointAssetRef</code>、<code>buildEndpointFramePrompt</code>、<code>buildStoryboardSceneFromAudioRow</code>、<code>generateEndpointFrameForRow</code>、<code>saveRowStoryboardDraft</code>、<code>saveAllStoryboardDrafts</code>、<code>EndpointFrameSlot</code>、<code>StoryboardVideoSlots</code> in <code>web/components/ad-recreation-board.tsx</code>;产品图、首尾帧和视频候选缩略图统一复用 <code>MediaAssetTile</code>,包括顶层 hover 放大和删除入口。产品白底图上传复用 <code>uploadStoryboardAsset</code>,视角自动识别调用 <code>analyzeProductViews</code>,缺角度自动补图调用 <code>generateProductAngleAsset</code>。当前单条/批量按钮只保存规划;首尾帧按钮调用 <code>generateSceneAsset</code>,传 <code>subject_brief</code> 和端点选择后的 1-2 张 <code>product_images</code>,不再传主体图或 contact sheet,再用 <code>PUT /frames/{idx}/storyboard</code> 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。语言策略由 <code>AudioStoryboardRow</code> 的英文主字段 + <code>*Zh</code> 镜像字段承载:<code>role</code> 内部是 <code>hook/pain/proof/solution/cta/bridge</code>,<code>buildEndpointFramePrompt</code> 和 <code>StoryboardScene</code> 主值默认英文,中文只用于团队阅读;首尾帧提交前前端 <code>translateText</code> 兜底,后端 <code>_ensure_english</code> 再兜底。<code>web/app/page.tsx</code> 的视频提交回调有暂停保护,旧入口误触也不会请求 <code>/storyboard/video</code>。</span></div>
|
||||
<div><strong>适合怎么描述</strong><span>“按音频逐句生成产品分镜、每行怎样改写口播、哪几句不需要产品或人物、首帧/尾帧该怎么停、首尾帧是否已经生成并准确、产品素材池识别/补图后的备注是否准确、哪些分镜后续才值得进入单条视频候选”。</span></div>
|
||||
</div>
|
||||
<div class="flow-row">
|
||||
@@ -737,17 +737,19 @@ api/main.py
|
||||
cutout_id,
|
||||
subject_kind: object | living,
|
||||
subject_assets: SubjectAsset[],
|
||||
subject_consensus_brief
|
||||
subject_consensus_brief,
|
||||
subject_consensus_brief_zh
|
||||
}</pre>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h3>AudioScript</h3>
|
||||
<p>第一步音频解析的结构化产物。<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后先保存原始转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。<code>rewritten_text</code>、<code>voice_url</code> 等字段仍保留给后续新配音阶段,当前第一步不默认写入。</p>
|
||||
<p>第一步音频解析的结构化产物。<code>pipeline_transcribe</code> 提取 <code>audio.wav</code> 后先保存原始英文转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。<code>rewritten_text</code> 是英文新口播,<code>rewritten_text_zh</code> 只作为团队审稿镜像;<code>voice_url</code> 等字段仍保留给后续新配音阶段。</p>
|
||||
<pre>AudioScript {
|
||||
status: idle | rewriting | completed | failed,
|
||||
source_text,
|
||||
source_zh,
|
||||
rewritten_text,
|
||||
rewritten_text_zh,
|
||||
speaker_profile,
|
||||
rhythm_profile,
|
||||
background_audio_profile,
|
||||
@@ -777,10 +779,11 @@ SubjectAsset {
|
||||
width, height, size,
|
||||
source_frame_indices[]
|
||||
}</pre>
|
||||
<p><code>SubjectTemplateItem</code> 保存用户确认过的主体视图包。<code>prompt_brief</code> 是后端从模板图反推的文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit。</p>
|
||||
<p><code>SubjectTemplateItem</code> 保存用户确认过的主体视图包。<code>prompt_brief</code> 是后端从模板图反推的英文文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit;<code>prompt_brief_zh</code> 仅用于模板库卡片和团队阅读。</p>
|
||||
<pre>SubjectTemplateItem {
|
||||
id, name, description, note,
|
||||
prompt_brief,
|
||||
prompt_brief_zh,
|
||||
subject_style: transparent_human | source_actor,
|
||||
primary_image,
|
||||
images: SubjectTemplateImage[]
|
||||
@@ -907,7 +910,7 @@ ProductRefStateItem {
|
||||
<tr><td>删除输入视频</td><td><code>DELETE /jobs/{id}</code></td><td><code>deleteJob</code></td><td>从任务队列、URL 和磁盘 <code>jobs/<id></code> 目录移除整个 job,包括源视频、关键帧、元素提取图和生成视频。</td></tr>
|
||||
<tr><td>解析视频</td><td><code>POST /jobs/{id}/analyze?frames=&target=&mode=&quality=</code></td><td><code>analyzeJob</code></td><td>抽参考帧能力。当前开始流程会在视频下载完成后自动调用一次,默认 <code>frames=12</code>、<code>target=motion</code>、<code>quality=accurate</code>、<code>mode=replace</code>,形成全局动作/节奏参考帧池;原版视频旁的“抽参考 12 帧”也会用同一参数显式重跑。<code>target</code> 仍支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值。</td></tr>
|
||||
<tr><td>音频文案轨</td><td><code>POST /jobs/{id}/transcribe</code></td><td><code>triggerTranscribe</code></td><td>若尚未拆轨,先从 <code>source.mp4</code> 提取 <code>audio.wav</code> 并回填 <code>source_audio_url</code>;随后用 ASR 提取原始文案,翻译成中文,写入 <code>audio_script.source_text</code>、<code>source_zh</code> 和逐句 <code>transcript</code>。远端 <code>ASR_MODEL</code> 失败后先走本机 <code>LOCAL_ASR_BIN</code>/<code>LOCAL_ASR_MODEL</code>(默认 <code>mlx_whisper</code>),再尝试 <code>ASR_FALLBACK_MODEL</code>。后端会拒绝重复文本、逐秒假字幕或覆盖率过低的结果,不再把不可听的多模态输出写进时间轴。中文翻译由 <code>TRANSLATE_MODEL</code> 按 ASR 段落补齐,失败时保留原文时间轴且中文可为空。再用 <code>ASR_FALLBACK_MODEL</code> 读取 <code>audio.wav</code> 和已有转写时间轴,多模态音频分析讲话人、语速节奏、停顿、背景音乐/环境声/音效,写入 <code>speaker_profile</code>、<code>rhythm_profile</code>、<code>background_audio_profile</code>;若模型分析失败,则用转写段落、时长和语速做本地估算兜底。当前第一步不默认生成 SKG 新口播和 Azure OpenAI 配音。</td></tr>
|
||||
<tr><td>分镜脚本改写</td><td><code>POST /jobs/{id}/script/rewrite</code></td><td><code>rewriteStoryboardScript</code></td><td>根据原参考文案、当前新口播、分镜角色、时间段和作者想法改写中文口播。<code>mode=segment</code> 只改一段;<code>mode=all</code> 一次改完整片,要求整片前后连贯。后端按 <code>AUDIO_REWRITE_MODEL</code>、<code>ASR_FALLBACK_MODEL</code>、<code>TRANSLATE_MODEL</code> 依次尝试,全部失败时用本地模板保留可编辑文案。接口只返回 <code>items[index,text]</code>,前端暂存在当前页面状态里,点击保存规划后写入 <code>StoryboardScene.action</code>。</td></tr>
|
||||
<tr><td>分镜脚本改写</td><td><code>POST /jobs/{id}/script/rewrite</code></td><td><code>rewriteStoryboardScript</code></td><td>根据原英文参考文案、当前英文新口播、英文 role enum、时间段和作者想法改写英文口播;作者想法若含中文,后端会先经 <code>_ensure_english</code> 兜底翻译。<code>mode=segment</code> 只改一段;<code>mode=all</code> 一次改完整片,要求整片前后连贯。后端按 <code>AUDIO_REWRITE_MODEL</code>、<code>ASR_FALLBACK_MODEL</code>、<code>TRANSLATE_MODEL</code> 依次尝试,全部失败时用英文本地模板保留可编辑文案。接口返回 <code>items[index,text,text_zh]</code>,其中 <code>text</code> 是写入模型链路的英文主值,<code>text_zh</code> 只供团队审稿镜像显示;点击保存规划后写入 <code>StoryboardScene.action</code>。</td></tr>
|
||||
<tr><td>原始音频文件</td><td><code>GET /jobs/{id}/audio.wav</code></td><td><code>sourceAudioUrl</code></td><td>返回拆轨得到的 wav;当前主界面不再渲染底部吸附音频条,右侧复刻工作表会读取该文件生成参考图式横向响度波形,并和原视频、逐句时间轴联动;波形标题栏显示当前播放秒数、总时长和鼠标指针停点秒数。</td></tr>
|
||||
<tr><td>改写配音文件</td><td><code>GET /jobs/{id}/audio-script.mp3</code></td><td><code>apiAssetUrl(job.audio_script.voice_url)</code></td><td>后续新配音阶段保留的 TTS 产物;服务端固定走 <code>VOICE_PROVIDER=azure_openai</code>,通过 <code>AZURE_OPENAI_BASE_URL</code> 的 OpenAI 协议生成 mp3,并按 <code>AZURE_TTS_PATHS</code> 依次尝试 <code>/audio/speech</code>、<code>/v1/audio/speech</code> 等路径。当前第一步不默认生成该文件。</td></tr>
|
||||
<tr><td>手动加帧</td><td><code>POST /jobs/{id}/frames?t=</code></td><td><code>addManualFrame</code></td><td>按视频时间戳抽一帧,index 递增但 frames 按 timestamp 排序。当前主界面会把原版视频播放器的播放秒数传给 <code>AudioIntakePanel</code> 标题栏右侧的“当前点抽帧”。</td></tr>
|
||||
@@ -1035,6 +1038,19 @@ ProductRefStateItem {
|
||||
<h2>变更记录</h2>
|
||||
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
||||
<div class="changelog">
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-18 · 模型 prompt 语言策略切到英文主值</h3>
|
||||
<span class="tag cyan">Prompt</span>
|
||||
<span class="tag violet">UI</span>
|
||||
<span class="tag blue">API</span>
|
||||
</header>
|
||||
<div class="body">
|
||||
<p><strong>问题:</strong>最终产物是英文 TikTok 二创广告,但前端默认分镜规划、首尾帧 prompt 和用户可编辑规划字段里混入中文,导致 <code>gpt-image-2</code>、脚本改写和后续视频模型收到中英混杂指令。</p>
|
||||
<p><strong>改动:</strong><code>AudioStoryboardRow.role</code> 改为 <code>hook/pain/proof/solution/cta/bridge</code> 英文枚举,UI 仍显示中文角色标签。<code>buildAudioStoryboardRows</code>、<code>buildVisualPlan</code>、<code>buildFirstFramePlan</code>、<code>buildLastFramePlan</code>、<code>buildSubjectDescription</code>、<code>buildEndpointFramePrompt</code> 和 <code>buildStoryboardSceneFromAudioRow</code> 的模型主字段改为英文,并新增 <code>*Zh</code> 镜像用于团队阅读。首尾帧提交前前端会对含中文 prompt 调 <code>translateText</code> 兜底,后端新增 <code>_ensure_english</code> 并挂到 <code>generate_scene_asset</code>、<code>generate_subject_assets</code>、脚本改写和音频分析入口。</p>
|
||||
<p><strong>影响:</strong>发给 LLM / 生图 / 视频模型的主 prompt 默认全英文;中文只作为团队审稿镜像、UI 标签和 toast。<code>AudioScript</code> 新增 <code>rewritten_text_zh</code>,<code>KeyElement</code> 新增 <code>subject_consensus_brief_zh</code>,<code>SubjectTemplateItem</code> 新增 <code>prompt_brief_zh</code>。<code>POST /jobs/{id}/script/rewrite</code> 返回 <code>text</code> 英文主值和 <code>text_zh</code> 中文镜像。</p>
|
||||
</div>
|
||||
</article>
|
||||
<article class="change">
|
||||
<header>
|
||||
<h3>2026-05-18 · 首尾帧改为主体 brief + 产品少量硬参考</h3>
|
||||
|
||||
@@ -54,6 +54,7 @@ import {
|
||||
updateElement,
|
||||
updateStoryboard,
|
||||
uploadStoryboardAsset,
|
||||
translateText,
|
||||
videoUrl,
|
||||
} from "@/lib/api"
|
||||
import { type NodeData } from "@/components/nodes"
|
||||
@@ -83,6 +84,7 @@ const VIDEO_MODELS = [
|
||||
|
||||
type VideoModel = (typeof VIDEO_MODELS)[number]["value"]
|
||||
type BoardThemeMode = "dark" | "light"
|
||||
type AudioStoryboardRole = "hook" | "pain" | "proof" | "solution" | "cta" | "bridge"
|
||||
|
||||
const BOARD_THEME_STORAGE_KEY = "skg-board-theme"
|
||||
|
||||
@@ -103,19 +105,28 @@ type AudioStoryboardRow = {
|
||||
start: number
|
||||
end: number
|
||||
source: string
|
||||
role: string
|
||||
sourceZh: string
|
||||
role: AudioStoryboardRole
|
||||
visualMode: StoryboardVisualMode
|
||||
needsProduct: boolean
|
||||
needsSubject: boolean
|
||||
subjectDescription: string
|
||||
subjectDescriptionZh: string
|
||||
skgCopy: string
|
||||
skgCopyZh: string
|
||||
visualPlan: string
|
||||
visualPlanZh: string
|
||||
firstFramePlan: string
|
||||
firstFramePlanZh: string
|
||||
lastFramePlan: string
|
||||
lastFramePlanZh: string
|
||||
referencePlan: string
|
||||
keyElements: string
|
||||
keyElementsZh: string
|
||||
productIntegration: string
|
||||
productIntegrationZh: string
|
||||
productPlacement: string
|
||||
productPlacementZh: string
|
||||
}
|
||||
|
||||
type ProductRefItem = ProductRefStateItem
|
||||
@@ -136,7 +147,7 @@ type ResolvedSubjectProfile = {
|
||||
payload: SubjectProfilePreference
|
||||
}
|
||||
type StoryboardVisualMode = NonNullable<StoryboardScene["visual_mode"]>
|
||||
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "visualPlan" | "firstFramePlan" | "lastFramePlan" | "productIntegration" | "productPlacement">>
|
||||
type RowPlanPatch = Partial<Pick<AudioStoryboardRow, "visualMode" | "needsProduct" | "needsSubject" | "subjectDescription" | "subjectDescriptionZh" | "visualPlan" | "visualPlanZh" | "firstFramePlan" | "firstFramePlanZh" | "lastFramePlan" | "lastFramePlanZh" | "productIntegration" | "productIntegrationZh" | "productPlacement" | "productPlacementZh">>
|
||||
type WorkflowStepId = "input" | "source" | "audio" | "visual" | "subject" | "product" | "script" | "scene" | "video"
|
||||
type WorkflowStepStatus = "blocked" | "pending" | "running" | "ready" | "paused"
|
||||
type WorkflowStep = {
|
||||
@@ -324,6 +335,52 @@ const PRODUCT_USE_TAG_LABELS: Record<string, string> = {
|
||||
material_texture: "材质",
|
||||
}
|
||||
|
||||
const ROLE_LABELS_ZH: Record<AudioStoryboardRole, string> = {
|
||||
hook: "开场钩子",
|
||||
pain: "痛点推进",
|
||||
proof: "利益证明",
|
||||
solution: "方案过渡",
|
||||
cta: "转化收口",
|
||||
bridge: "节奏承接",
|
||||
}
|
||||
|
||||
const ROLE_LABELS_EN: Record<AudioStoryboardRole, string> = {
|
||||
hook: "hook",
|
||||
pain: "pain build",
|
||||
proof: "benefit proof",
|
||||
solution: "solution transition",
|
||||
cta: "conversion close",
|
||||
bridge: "rhythm bridge",
|
||||
}
|
||||
|
||||
const PRODUCT_VIEW_PROMPT_LABELS: Record<string, string> = {
|
||||
front: "front / outer shell",
|
||||
left_45: "wearer's left 45-degree view",
|
||||
right_45: "wearer's right 45-degree view",
|
||||
side_thickness: "side thickness view",
|
||||
inner_contacts: "inner neck-contact pads",
|
||||
back_bottom: "back / bottom structure",
|
||||
}
|
||||
|
||||
const PRODUCT_BACKGROUND_PROMPT_LABELS: Record<string, string> = {
|
||||
white: "white background",
|
||||
black: "black background",
|
||||
simple: "simple solid background",
|
||||
complex: "complex background",
|
||||
unknown: "unknown background",
|
||||
}
|
||||
|
||||
const PRODUCT_USE_TAG_PROMPT_LABELS: Record<string, string> = {
|
||||
hero_packshot: "hero packshot",
|
||||
wearing_scale: "wearing scale",
|
||||
inner_contact: "inner contact pads",
|
||||
side_thickness: "side thickness",
|
||||
asymmetry: "left-right asymmetry",
|
||||
button_detail: "button detail",
|
||||
back_bottom: "back/bottom structure",
|
||||
material_texture: "material texture",
|
||||
}
|
||||
|
||||
const controlClass =
|
||||
"h-10 rounded-md border border-white/10 bg-black/55 px-3 text-[12px] text-white outline-none transition focus:border-cyan-300/60 disabled:cursor-not-allowed disabled:opacity-40"
|
||||
|
||||
@@ -352,6 +409,20 @@ function shortId(id?: string | null) {
|
||||
return id ? id.slice(0, 8) : "-"
|
||||
}
|
||||
|
||||
function containsCjk(text: string) {
|
||||
return /[\u3400-\u9fff]/.test(text)
|
||||
}
|
||||
|
||||
async function ensureEnglishForModel(text: string) {
|
||||
const trimmed = text.trim()
|
||||
if (!trimmed || !containsCjk(trimmed)) return trimmed
|
||||
try {
|
||||
return await translateText(trimmed, "en")
|
||||
} catch {
|
||||
return trimmed
|
||||
}
|
||||
}
|
||||
|
||||
function subjectProfileOption(category: SubjectProfileCategory, value: string) {
|
||||
return category.options.find((option) => option.value === value) ?? category.options[0]
|
||||
}
|
||||
@@ -373,6 +444,16 @@ function resolveSubjectProfile(
|
||||
const values = { ...DEFAULT_SUBJECT_PROFILE_DRAFT }
|
||||
const labelParts: string[] = []
|
||||
const promptParts: string[] = []
|
||||
const promptLabelByKey: Record<SubjectProfileFieldKey, string> = {
|
||||
gender: "gender presentation",
|
||||
age: "age range",
|
||||
wardrobe: "wardrobe style",
|
||||
region_ethnicity: "regional or ethnic appearance cues",
|
||||
skin_tone: "skin tone",
|
||||
body: "body proportion",
|
||||
hair: "hair style",
|
||||
mood: "commercial mood",
|
||||
}
|
||||
for (const category of SUBJECT_PROFILE_CATEGORIES) {
|
||||
const rawValue = draft[category.key] || "random"
|
||||
let option = subjectProfileOption(category, rawValue)
|
||||
@@ -382,7 +463,7 @@ function resolveSubjectProfile(
|
||||
}
|
||||
values[category.key] = option.value
|
||||
labelParts.push(`${category.label}:${option.label}`)
|
||||
promptParts.push(`${category.label}: ${option.prompt}`)
|
||||
promptParts.push(`${promptLabelByKey[category.key]}: ${option.prompt}`)
|
||||
}
|
||||
const summary = labelParts.join(" / ")
|
||||
const promptSummary = promptParts.join("; ")
|
||||
@@ -393,14 +474,14 @@ function resolveSubjectProfile(
|
||||
promptSummary,
|
||||
payload: {
|
||||
mode,
|
||||
gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).label,
|
||||
age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).label,
|
||||
wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).label,
|
||||
region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).label,
|
||||
skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).label,
|
||||
body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).label,
|
||||
hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).label,
|
||||
mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).label,
|
||||
gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).prompt,
|
||||
age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).prompt,
|
||||
wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).prompt,
|
||||
region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).prompt,
|
||||
skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).prompt,
|
||||
body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).prompt,
|
||||
hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).prompt,
|
||||
mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).prompt,
|
||||
resolved_summary: summary,
|
||||
prompt_summary: promptSummary,
|
||||
},
|
||||
@@ -828,161 +909,254 @@ function buildFallbackScene(job: Job, frame: KeyFrame, order: number): Storyboar
|
||||
const duration = Math.max(3.5, Math.min(7.5, Math.max(job.duration || 0, frames.length * 5) / Math.max(frames.length, 1)))
|
||||
const audio = job.audio_script?.rewritten_text?.trim()
|
||||
|| job.transcript?.slice(0, 4).map((item) => item.en || item.zh).filter(Boolean).join(" ")
|
||||
|| "按原音频说话节奏改写为 SKG 产品介绍。"
|
||||
|| "Rewrite the original audio pacing into a new SKG product introduction."
|
||||
const objects = frame.description?.objects?.slice(0, 5).map((item) => item.name).filter(Boolean).join("、")
|
||||
return {
|
||||
duration: Number(duration.toFixed(1)),
|
||||
first_image: null,
|
||||
last_image: null,
|
||||
subject: objects ? `关键元素候选:${objects}` : "保留原视频最重要的主体动作和构图关系。",
|
||||
scene: `${frame.description?.scene || `按第 ${order + 1} 段音频规划 SKG 信息流广告分镜。`}\n音频节奏依据:${audio.slice(0, 220)}`,
|
||||
product: "把原素材里的产品/痛点转成 SKG 颈部/肩颈按摩仪表达,默认使用 SKG 四张产品角度图做产品真源。",
|
||||
subject: objects ? `Key element candidates: ${objects}` : "Keep the source video's most important subject motion and composition relationship.",
|
||||
scene: `${frame.description?.scene || `Plan SKG information-feed ad scene ${order + 1} from the audio segment.`}\nAudio pacing reference: ${audio.slice(0, 220)}`,
|
||||
product: "Convert the source product or pain-point context into SKG neck-and-shoulder massager expression. Use the uploaded SKG product angles as product truth.",
|
||||
action: frame.description?.style
|
||||
? `沿用原画面的讲话节奏、动作节点和 ${frame.description.style},突出使用前紧绷、使用后放松。`
|
||||
: "沿用原视频的讲话节奏和动作节点,突出使用前紧绷、使用后放松。",
|
||||
? `Keep the source speaking rhythm, action beats, and ${frame.description.style}; show tension before use and relaxed comfort after use.`
|
||||
: "Keep the source speaking rhythm and action beats; show tension before use and relaxed comfort after use.",
|
||||
reference_ids: [],
|
||||
}
|
||||
}
|
||||
|
||||
function classifyAudioRole(text: string, index: number, total: number) {
|
||||
function classifyAudioRole(text: string, index: number, total: number): AudioStoryboardRole {
|
||||
const lower = text.toLowerCase()
|
||||
if (index === 0) return "开场钩子"
|
||||
if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "转化收口"
|
||||
if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "痛点推进"
|
||||
if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "利益证明"
|
||||
if (/use|try|apple|product|bottle|one month/.test(lower)) return "方案过渡"
|
||||
return "节奏承接"
|
||||
if (index === 0) return "hook"
|
||||
if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "cta"
|
||||
if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "pain"
|
||||
if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "proof"
|
||||
if (/use|try|apple|product|bottle|one month/.test(lower)) return "solution"
|
||||
return "bridge"
|
||||
}
|
||||
|
||||
function buildSkgCopy(role: string, index: number) {
|
||||
const variants: Record<string, string[]> = {
|
||||
"开场钩子": [
|
||||
function buildSkgCopy(role: AudioStoryboardRole, index: number) {
|
||||
const variants: Record<AudioStoryboardRole, string[]> = {
|
||||
hook: [
|
||||
"If you spend hours looking down at your phone or working at a desk, your neck and shoulders may already be carrying that tension.",
|
||||
"A few hours on screens can make your neck and shoulders feel tired faster than you expect.",
|
||||
],
|
||||
pain: [
|
||||
"That tight neck, heavy shoulder feeling, and uncomfortable head lift are signs you should not wait to deal with it.",
|
||||
"Commuting, desk work, parenting, and phone scrolling can keep your neck and shoulders tense all day.",
|
||||
],
|
||||
proof: [
|
||||
"The SKG neck-and-shoulder massager sits around the back of your neck and shoulders, bringing warmth and kneading-like comfort right where you feel tight.",
|
||||
"Wear it hands-free between work, at home, or before bed to settle into a calmer relaxation rhythm.",
|
||||
],
|
||||
solution: [
|
||||
"This beat turns the source explanation into a clear SKG routine: pick it up, wear it, adjust the fit, and relax.",
|
||||
"Let the product enter naturally, and show the change from neck tension to a more relaxed state.",
|
||||
],
|
||||
cta: [
|
||||
"If you want neck-and-shoulder relaxation to become part of your daily routine, this SKG massager is an easy place to start.",
|
||||
"Close with a clear product detail and a relaxed expression so viewers know exactly what to try next.",
|
||||
],
|
||||
bridge: [
|
||||
"Keep the source video's short, fast rhythm, but anchor each line in a specific neck-and-shoulder moment or product action.",
|
||||
"Use this line as a bridge from the pain point into the SKG routine without slowing the pace.",
|
||||
],
|
||||
}
|
||||
const list = variants[role] ?? variants.bridge
|
||||
return list[index % list.length]
|
||||
}
|
||||
|
||||
function buildSkgCopyZh(role: AudioStoryboardRole, index: number) {
|
||||
const variants: Record<AudioStoryboardRole, string[]> = {
|
||||
hook: [
|
||||
"如果你也经常低头刷手机、久坐办公,肩颈紧绷可能已经在悄悄影响状态。",
|
||||
"每天盯屏几个小时,脖子和肩膀的疲惫会比你想得更早出现。",
|
||||
],
|
||||
"痛点推进": [
|
||||
pain: [
|
||||
"脖子发紧、肩膀沉、抬头不舒服,不一定要等到很难受才处理。",
|
||||
"通勤、办公、带娃、刷手机叠在一起,肩颈很容易一直处在紧绷状态。",
|
||||
],
|
||||
"利益证明": [
|
||||
proof: [
|
||||
"SKG 颈部按摩仪贴合后颈和肩颈两侧,把热敷感和揉按感带到真正紧的位置。",
|
||||
"戴上后不用占手,工作间隙、居家放松、睡前都能快速进入舒缓节奏。",
|
||||
],
|
||||
"方案过渡": [
|
||||
solution: [
|
||||
"这一镜把原片的讲解节奏换成 SKG 使用步骤:拿起、佩戴、贴合、放松。",
|
||||
"让产品自然进入画面,重点不是硬推,而是把肩颈紧绷到放松的变化拍清楚。",
|
||||
],
|
||||
"转化收口": [
|
||||
cta: [
|
||||
"如果你也想把肩颈放松变成日常习惯,可以先从这台 SKG 开始。",
|
||||
"最后用清晰产品特写和轻松状态收住,让用户知道现在就可以入手。",
|
||||
],
|
||||
"节奏承接": [
|
||||
bridge: [
|
||||
"延续原片短句快节奏,把每一句都落到一个具体肩颈场景或产品动作。",
|
||||
"这一句作为过渡,画面从痛点切到产品,让节奏继续往下走。",
|
||||
],
|
||||
}
|
||||
const list = variants[role] ?? variants["节奏承接"]
|
||||
const list = variants[role] ?? variants.bridge
|
||||
return list[index % list.length]
|
||||
}
|
||||
|
||||
function buildVisualPlan(role: string) {
|
||||
if (role === "开场钩子") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
|
||||
if (role === "痛点推进") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
|
||||
if (role === "利益证明") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
|
||||
if (role === "转化收口") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
|
||||
function buildVisualPlan(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "Vertical close-up creator opening. The subject gently rubs the neck or rotates the shoulders to establish fatigue immediately."
|
||||
if (role === "pain") return "Keep the source expression, gesture rhythm, and fast pacing while emphasizing phone posture, desk sitting, and neck-and-shoulder tension."
|
||||
if (role === "proof") return "Bring the product into frame and place it around the back of the neck, then cut to fit, button, warmth, and kneading-comfort details."
|
||||
if (role === "cta") return "End with a clean product detail plus a relaxed expression, keeping the quick action feeling of a feed ad."
|
||||
return "Keep the source-style composition and camera movement, but replace the content with an SKG neck-and-shoulder relaxation scene."
|
||||
}
|
||||
|
||||
function buildVisualPlanZh(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。"
|
||||
if (role === "pain") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。"
|
||||
if (role === "proof") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。"
|
||||
if (role === "cta") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。"
|
||||
return "保持原片同类构图和运镜,把画面内容替换成 SKG 肩颈放松场景。"
|
||||
}
|
||||
|
||||
function visualModeDefaults(mode: StoryboardVisualMode) {
|
||||
function visualModeDefaults(mode: StoryboardVisualMode, language: "en" | "zh" = "en") {
|
||||
if (mode === "person_only") {
|
||||
return {
|
||||
needsProduct: false,
|
||||
needsSubject: true,
|
||||
productPlacement: "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。",
|
||||
productPlacement: language === "zh"
|
||||
? "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。"
|
||||
: "Do not show the product in this beat. Use the subject's state, pain point, or voice-over performance to carry the rhythm; do not force in the SKG product.",
|
||||
}
|
||||
}
|
||||
if (mode === "product_only") {
|
||||
return {
|
||||
needsProduct: true,
|
||||
needsSubject: false,
|
||||
productPlacement: "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。",
|
||||
productPlacement: language === "zh"
|
||||
? "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。"
|
||||
: "Show only the SKG neck-and-shoulder massager, wearing angle, or functional detail; do not force a main character into this beat.",
|
||||
}
|
||||
}
|
||||
if (mode === "environment") {
|
||||
return {
|
||||
needsProduct: false,
|
||||
needsSubject: false,
|
||||
productPlacement: "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。",
|
||||
productPlacement: language === "zh"
|
||||
? "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。"
|
||||
: "Use this beat as a scene, mood, or pacing transition. Do not show the product or main subject; keep only space, light, and motion rhythm.",
|
||||
}
|
||||
}
|
||||
return {
|
||||
needsProduct: true,
|
||||
needsSubject: true,
|
||||
productPlacement: "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。",
|
||||
productPlacement: language === "zh"
|
||||
? "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。"
|
||||
: "Show the SKG neck-and-shoulder massager as an external wearable product, built around picking it up, wearing it, adjusting it, pressing controls, or relaxing with it.",
|
||||
}
|
||||
}
|
||||
|
||||
function visualModeForRole(role: string): StoryboardVisualMode {
|
||||
if (role === "开场钩子" || role === "痛点推进") return "person_only"
|
||||
if (role === "转化收口") return "product_only"
|
||||
if (role === "节奏承接") return "environment"
|
||||
function visualModeForRole(role: AudioStoryboardRole): StoryboardVisualMode {
|
||||
if (role === "hook" || role === "pain") return "person_only"
|
||||
if (role === "cta") return "product_only"
|
||||
if (role === "bridge") return "environment"
|
||||
return "person_product"
|
||||
}
|
||||
|
||||
function buildFirstFramePlan(role: string) {
|
||||
if (role === "开场钩子") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
|
||||
if (role === "痛点推进") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
|
||||
if (role === "利益证明") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
|
||||
if (role === "方案过渡") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
|
||||
if (role === "转化收口") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
|
||||
function buildFirstFramePlan(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "Close-up subject looking at camera or working with head down, one hand lightly touching the back of the neck, with no product visible yet."
|
||||
if (role === "pain") return "Preserve the source action rhythm while making neck tension, looking down, neck rubbing, or desk-sitting posture clear."
|
||||
if (role === "proof") return "The subject picks up or prepares to wear the SKG neck-and-shoulder massager; product position is clear but the action has just started."
|
||||
if (role === "solution") return "Move from the pain state into picking up the product or bringing it toward the neck and shoulders, ready to begin use."
|
||||
if (role === "cta") return "Clean product close-up or stable worn-product frame, leaving a strong visual focus for the conversion close."
|
||||
return "Start from the current source sentence's composition to carry the rhythm without forcing a subject change."
|
||||
}
|
||||
|
||||
function buildFirstFramePlanZh(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。"
|
||||
if (role === "pain") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。"
|
||||
if (role === "proof") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。"
|
||||
if (role === "solution") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。"
|
||||
if (role === "cta") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。"
|
||||
return "按原视频当前句的构图启动,先承接节奏,不强行改变镜头主体。"
|
||||
}
|
||||
|
||||
function buildLastFramePlan(role: string) {
|
||||
if (role === "开场钩子") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
|
||||
if (role === "痛点推进") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
|
||||
if (role === "利益证明") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
|
||||
if (role === "方案过渡") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
|
||||
if (role === "转化收口") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
|
||||
function buildLastFramePlan(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "The subject lifts the head or becomes more focused, leaving room for the product or solution to enter in the next beat."
|
||||
if (role === "pain") return "Amplify the tense state into a clear stopping point, ready to cut into the product solution."
|
||||
if (role === "proof") return "The product is correctly worn around the back of the neck and shoulders, the subject looks more relaxed, and product scale is stable."
|
||||
if (role === "solution") return "The product fits against the neck and shoulders, hand adjustment is complete, and the frame can move into functional detail or relaxation."
|
||||
if (role === "cta") return "Hold a stable product or worn-product frame with clean composition, ready for purchase or action-call continuation."
|
||||
return "Advance the action slightly and hold a stable endpoint that connects naturally to the next sentence."
|
||||
}
|
||||
|
||||
function buildLastFramePlanZh(role: AudioStoryboardRole) {
|
||||
if (role === "hook") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。"
|
||||
if (role === "pain") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。"
|
||||
if (role === "proof") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。"
|
||||
if (role === "solution") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。"
|
||||
if (role === "cta") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。"
|
||||
return "动作小幅推进并稳定停住,保留与下一句衔接的方向感。"
|
||||
}
|
||||
|
||||
function buildSubjectDescription(role: string, visualMode: StoryboardVisualMode) {
|
||||
function buildSubjectDescription(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
|
||||
if (visualMode === "product_only" || visualMode === "environment") return ""
|
||||
const base = "Consistent similar subject: a friendly transparent or semi-transparent humanoid with visible clean white skeleton inside, commercial not horror, with neck, collarbone, and upper-back areas clear for wearing a neck-and-shoulder massager."
|
||||
if (role === "hook") return `${base} Front or upper-body creator speaking state, with a pain-point or curious expression that grabs attention quickly.`
|
||||
if (role === "pain") return `${base} Neck-and-shoulder tension, looking down, desk posture, or rubbing the neck; make the neck line, shoulders, and upper back readable.`
|
||||
if (role === "proof") return `${base} Relaxed state while wearing or about to wear the product, prioritizing neck-and-shoulder close-up, side, and back-neck angles.`
|
||||
if (role === "solution") return `${base} Hands adjust the product or show wearable fit naturally; product placement must not hide important anatomy or device structure.`
|
||||
if (role === "cta") return `${base} Stable, relaxed, clean ending state using front, three-quarter, or stable worn-product framing.`
|
||||
return `${base} Keep one consistent subject identity, material, body type, gender presentation, and commercial mood across the whole video.`
|
||||
}
|
||||
|
||||
function buildSubjectDescriptionZh(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) {
|
||||
if (visualMode === "product_only" || visualMode === "environment") return ""
|
||||
const base = "统一相似主体:透明或半透明皮肤包裹可见白色骨架的人形,广告感、非恐怖、肩颈/锁骨/上背区域清晰,适合佩戴肩颈按摩仪。"
|
||||
if (role === "开场钩子") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
|
||||
if (role === "痛点推进") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
|
||||
if (role === "利益证明") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
|
||||
if (role === "方案过渡") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
|
||||
if (role === "转化收口") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
|
||||
if (role === "hook") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。`
|
||||
if (role === "pain") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。`
|
||||
if (role === "proof") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。`
|
||||
if (role === "solution") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。`
|
||||
if (role === "cta") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。`
|
||||
return `${base} 保持与整片一致的主体身份、材质、体型、性别表现和广告气质。`
|
||||
}
|
||||
|
||||
function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] {
|
||||
if (!job?.transcript.length) return []
|
||||
return job.transcript.map((segment, index) => {
|
||||
const source = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
|
||||
const source = segment.en?.trim() || segment.zh?.trim() || "Source audio script pending."
|
||||
const sourceZh = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充"
|
||||
const role = classifyAudioRole(`${segment.en} ${segment.zh}`, index, job.transcript.length)
|
||||
const visualMode = visualModeForRole(role)
|
||||
const defaults = visualModeDefaults(visualMode)
|
||||
const defaultsZh = visualModeDefaults(visualMode, "zh")
|
||||
const keyElements = role === "proof"
|
||||
? "wearing action, product position, hand pressing the control, relaxed expression"
|
||||
: "creator framing, subject gesture, facial rhythm, scene lighting"
|
||||
const keyElementsZh = role === "proof"
|
||||
? "佩戴动作、产品位置、手部按键、放松表情"
|
||||
: "口播构图、人物动作、表情节奏、场景光线"
|
||||
return {
|
||||
index: segment.index,
|
||||
start: segment.start,
|
||||
end: segment.end,
|
||||
source,
|
||||
sourceZh,
|
||||
role,
|
||||
visualMode,
|
||||
needsProduct: defaults.needsProduct,
|
||||
needsSubject: defaults.needsSubject,
|
||||
subjectDescription: buildSubjectDescription(role, visualMode),
|
||||
subjectDescriptionZh: buildSubjectDescriptionZh(role, visualMode),
|
||||
skgCopy: buildSkgCopy(role, index),
|
||||
skgCopyZh: buildSkgCopyZh(role, index),
|
||||
visualPlan: buildVisualPlan(role),
|
||||
visualPlanZh: buildVisualPlanZh(role),
|
||||
firstFramePlan: buildFirstFramePlan(role),
|
||||
firstFramePlanZh: buildFirstFramePlanZh(role),
|
||||
lastFramePlan: buildLastFramePlan(role),
|
||||
referencePlan: `从原视频 ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s 定向抽 1-2 张参考帧。`,
|
||||
keyElements: role === "利益证明" ? "佩戴动作、产品位置、手部按键、放松表情" : "口播构图、人物动作、表情节奏、场景光线",
|
||||
productIntegration: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
|
||||
lastFramePlanZh: buildLastFramePlanZh(role),
|
||||
referencePlan: `Extract 1-2 targeted reference frames from source video ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s.`,
|
||||
keyElements,
|
||||
keyElementsZh,
|
||||
productIntegration: "Replace the source product or prop context with the SKG white U-shaped neck-and-shoulder massager. The product must be worn externally around the neck and shoulders.",
|
||||
productIntegrationZh: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。",
|
||||
productPlacement: defaults.productPlacement,
|
||||
productPlacementZh: defaultsZh.productPlacement,
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1173,14 +1347,14 @@ function productReferenceNotes(items: ProductRefItem[]) {
|
||||
if (!items.length) return ""
|
||||
return items
|
||||
.map((item, index) => {
|
||||
const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_LABELS[tag]).filter(Boolean).join("/")
|
||||
const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_PROMPT_LABELS[tag] ?? tag).filter(Boolean).join(", ")
|
||||
const orientation = formatProductOrientation(item.orientation)
|
||||
const direction = orientation ? `;方向:${orientation}` : ""
|
||||
const landmarks = item.landmarks.length ? `;结构:${item.landmarks.join("/")}` : ""
|
||||
const risk = item.risk ? `;风险:${item.risk}` : ""
|
||||
return `${index + 1}. ${productViewLabel(item.view)}|${productBackgroundLabel(item.background)}|${tags}:${item.note || "无补充备注"}${direction}${landmarks}${risk}`
|
||||
const direction = orientation ? `; orientation: ${orientation}` : ""
|
||||
const landmarks = item.landmarks.length ? `; structural landmarks: ${item.landmarks.join(", ")}` : ""
|
||||
const risk = item.risk ? `; risk: ${item.risk}` : ""
|
||||
return `${index + 1}. ${PRODUCT_VIEW_PROMPT_LABELS[item.view] ?? item.view} | ${PRODUCT_BACKGROUND_PROMPT_LABELS[item.background] ?? item.background} | ${tags || "general product reference"}: ${item.note || "no extra note"}${direction}${landmarks}${risk}`
|
||||
})
|
||||
.join(";")
|
||||
.join("; ")
|
||||
}
|
||||
|
||||
function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
|
||||
@@ -1189,11 +1363,11 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch {
|
||||
visualMode: scene.visual_mode,
|
||||
needsProduct: scene.needs_product,
|
||||
needsSubject: scene.needs_subject,
|
||||
subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
|
||||
visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
|
||||
subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("Subject source") && !line.startsWith("No main subject") && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(),
|
||||
visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("Visual mode") && !line.startsWith("First-frame plan") && !line.startsWith("Last-frame plan") && !line.startsWith("Source audio reference") && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(),
|
||||
firstFramePlan: scene.first_frame_plan,
|
||||
lastFramePlan: scene.last_frame_plan,
|
||||
productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
|
||||
productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("Product requirement") && !line.startsWith("Product placement") && !line.startsWith("Product reference pool") && !line.startsWith("No product") && !line.startsWith("This beat") && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(),
|
||||
productPlacement: scene.product_placement,
|
||||
}
|
||||
}
|
||||
@@ -1206,34 +1380,40 @@ function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioSto
|
||||
needsProduct: patch.needsProduct ?? row.needsProduct,
|
||||
needsSubject: patch.needsSubject ?? row.needsSubject,
|
||||
subjectDescription: patch.subjectDescription ?? row.subjectDescription,
|
||||
subjectDescriptionZh: patch.subjectDescriptionZh ?? row.subjectDescriptionZh,
|
||||
visualPlan: patch.visualPlan ?? row.visualPlan,
|
||||
visualPlanZh: patch.visualPlanZh ?? row.visualPlanZh,
|
||||
firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan,
|
||||
firstFramePlanZh: patch.firstFramePlanZh ?? row.firstFramePlanZh,
|
||||
lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan,
|
||||
lastFramePlanZh: patch.lastFramePlanZh ?? row.lastFramePlanZh,
|
||||
productIntegration: patch.productIntegration ?? row.productIntegration,
|
||||
productIntegrationZh: patch.productIntegrationZh ?? row.productIntegrationZh,
|
||||
productPlacement: patch.productPlacement ?? row.productPlacement,
|
||||
productPlacementZh: patch.productPlacementZh ?? row.productPlacementZh,
|
||||
}
|
||||
}
|
||||
|
||||
function productPriorityForRow(row: AudioStoryboardRow) {
|
||||
const viewPriorityByRole: Record<string, string[]> = {
|
||||
"开场钩子": ["front", "left_45", "right_45", "side_thickness"],
|
||||
"痛点推进": ["front", "side_thickness", "left_45", "right_45"],
|
||||
"利益证明": ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
|
||||
"方案过渡": ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
|
||||
"转化收口": ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
|
||||
"节奏承接": ["front", "left_45", "right_45", "side_thickness"],
|
||||
const viewPriorityByRole: Record<AudioStoryboardRole, string[]> = {
|
||||
hook: ["front", "left_45", "right_45", "side_thickness"],
|
||||
pain: ["front", "side_thickness", "left_45", "right_45"],
|
||||
proof: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"],
|
||||
solution: ["front", "left_45", "right_45", "inner_contacts", "side_thickness"],
|
||||
cta: ["front", "back_bottom", "left_45", "right_45", "inner_contacts"],
|
||||
bridge: ["front", "left_45", "right_45", "side_thickness"],
|
||||
}
|
||||
const tagPriorityByRole: Record<string, string[]> = {
|
||||
"开场钩子": ["hero_packshot", "asymmetry", "side_thickness"],
|
||||
"痛点推进": ["wearing_scale", "side_thickness", "hero_packshot"],
|
||||
"利益证明": ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
|
||||
"方案过渡": ["wearing_scale", "hero_packshot", "inner_contact"],
|
||||
"转化收口": ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
|
||||
"节奏承接": ["hero_packshot", "asymmetry", "side_thickness"],
|
||||
const tagPriorityByRole: Record<AudioStoryboardRole, string[]> = {
|
||||
hook: ["hero_packshot", "asymmetry", "side_thickness"],
|
||||
pain: ["wearing_scale", "side_thickness", "hero_packshot"],
|
||||
proof: ["inner_contact", "wearing_scale", "button_detail", "side_thickness"],
|
||||
solution: ["wearing_scale", "hero_packshot", "inner_contact"],
|
||||
cta: ["hero_packshot", "back_bottom", "asymmetry", "material_texture"],
|
||||
bridge: ["hero_packshot", "asymmetry", "side_thickness"],
|
||||
}
|
||||
return {
|
||||
views: viewPriorityByRole[row.role] ?? viewPriorityByRole["节奏承接"],
|
||||
tags: tagPriorityByRole[row.role] ?? tagPriorityByRole["节奏承接"],
|
||||
views: viewPriorityByRole[row.role] ?? viewPriorityByRole.bridge,
|
||||
tags: tagPriorityByRole[row.role] ?? tagPriorityByRole.bridge,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1245,17 +1425,17 @@ function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" |
|
||||
if (!views.includes(view)) views.push(view)
|
||||
if (tag && !tags.includes(tag)) tags.push(tag)
|
||||
}
|
||||
if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
|
||||
if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
|
||||
if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
|
||||
if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
|
||||
if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail")
|
||||
if (/back neck|neck back|upper back|back view|back side|shoulder blade|last frame|worn|wearing complete|fit complete|后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom")
|
||||
if (/side|profile|thickness|volume|left side|right side|45|adjust|pick up|bring.*neck|toward.*shoulder|侧面|侧身|厚度|侧厚|体积|左侧|右侧|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness")
|
||||
if (/inner|contact pad|massage head|touching skin|neck contact|skin contact|内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact")
|
||||
if (/wearing scale|upper body|worn on human|neck|shoulder|collarbone|佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale")
|
||||
if (/button|control|switch|logo|按键|按钮|控制|开关/.test(text)) add("right_45", "button_detail")
|
||||
return { views, tags }
|
||||
}
|
||||
|
||||
function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
|
||||
const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase()
|
||||
return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
|
||||
return /side|profile|thickness|back neck|upper back|back view|inner|contact pad|massage head|neck contact|close-up|closeup|button|control|worn|wearing complete|侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text)
|
||||
? MAX_PRODUCT_REFS_PER_ENDPOINT
|
||||
: 1
|
||||
}
|
||||
@@ -1336,26 +1516,42 @@ function subjectViewRoleHint(view: string) {
|
||||
return hints[view] ?? "主体参考视角"
|
||||
}
|
||||
|
||||
function subjectViewPromptHint(view: string) {
|
||||
const hints: Record<string, string> = {
|
||||
front: "front speaking shot, opening hook, expression, conversion close",
|
||||
three_quarter_left: "left three-quarter angle, talking, pre-wear motion, natural turn",
|
||||
three_quarter_right: "right three-quarter angle, talking, pre-wear motion, natural turn",
|
||||
left: "left side, neck-and-shoulder side profile, wearing action, product thickness and position",
|
||||
right: "right side, neck-and-shoulder side profile, wearing action, product thickness and position",
|
||||
back: "back view, back neck and upper shoulders, product placement landing",
|
||||
bust_front: "front neck-and-shoulder close-up, pain-point expression, wearing scale",
|
||||
bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
|
||||
bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit",
|
||||
back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit",
|
||||
}
|
||||
return hints[view] ?? "subject reference view"
|
||||
}
|
||||
|
||||
function subjectDescriptionForRow(row: AudioStoryboardRow, subjectRefs: SubjectPlanningRef[]) {
|
||||
const trimmed = row.subjectDescription.trim()
|
||||
if (trimmed) return trimmed
|
||||
const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join("、")
|
||||
const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join(", ")
|
||||
return [
|
||||
"统一相似主体:使用已生成的主体视图作为人物真源,保持同一人物身份、体型、材质、年龄段、性别表现和广告气质。",
|
||||
labels ? `可用主体视角:${labels}。` : "",
|
||||
"如果本条需要人物但缺少更具体描述,默认保持透明皮肤包裹白色骨架、非恐怖、肩颈区域清晰可佩戴产品。",
|
||||
"Consistent similar subject: use the generated subject view pack as the character truth, maintaining one identity, body proportion, material, age range, gender presentation, and commercial mood.",
|
||||
labels ? `Available subject views: ${labels}.` : "",
|
||||
"If this beat needs a subject but lacks a specific description, default to a friendly transparent skin shell with visible white skeleton, non-horror, with clear neck and shoulder area for wearable product placement.",
|
||||
].filter(Boolean).join("")
|
||||
}
|
||||
|
||||
function subjectPriorityForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") {
|
||||
const text = `${row.role} ${row.visualMode} ${row.subjectDescription} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productPlacement}`.toLowerCase()
|
||||
if (/后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
|
||||
if (/back neck|upper back|shoulder blade|back view|fit|worn|wearing complete|correctly worn|后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) {
|
||||
return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "left", "right", "bust_front", "three_quarter_left", "three_quarter_right", "front"]
|
||||
}
|
||||
if (/侧面|左侧|右侧|45|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
|
||||
if (/side|left|right|45|adjust|pick up|prepare to wear|toward.*neck|hand|侧面|左侧|右侧|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) {
|
||||
return ["bust_left_45", "bust_right_45", "left", "right", "three_quarter_left", "three_quarter_right", "bust_front", "front", "back_neck_detail", "back"]
|
||||
}
|
||||
if (/近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
|
||||
if (/close-up|closeup|upper-body|bust|neck|shoulder|collarbone|rubbing.*neck|looking down|tense|tension|近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) {
|
||||
return ["bust_front", "bust_left_45", "bust_right_45", "front", "three_quarter_left", "three_quarter_right", "left", "right", "back_neck_detail", "back"]
|
||||
}
|
||||
if (role === "last_frame" && row.needsProduct) {
|
||||
@@ -1371,8 +1567,8 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
|
||||
.map((ref, index) => {
|
||||
const rank = priority.indexOf(ref.view)
|
||||
const labelText = `${ref.label || ""} ${ref.roleHint}`.toLowerCase()
|
||||
const closeupScore = /肩颈|后颈|近景|贴合|佩戴/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
|
||||
&& /bust|neck|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
|
||||
const closeupScore = /neck|shoulder|back neck|close-up|closeup|fit|wear|佩戴|肩颈|后颈|近景|贴合/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement)
|
||||
&& /bust|neck|close-up|closeup|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`)
|
||||
? 12
|
||||
: 0
|
||||
return { ref, score: (rank >= 0 ? 100 - rank * 8 : 0) + closeupScore - index }
|
||||
@@ -1383,7 +1579,7 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR
|
||||
}
|
||||
|
||||
function subjectReferenceNotes(refs: SubjectPlanningRef[]) {
|
||||
return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)}|${ref.roleHint}`).join(";")
|
||||
return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)} | ${subjectViewPromptHint(ref.view)}`).join("; ")
|
||||
}
|
||||
|
||||
function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): SubjectPlanningRef[] {
|
||||
@@ -1432,19 +1628,20 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" |
|
||||
const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan
|
||||
const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : ""
|
||||
return [
|
||||
`分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}。`,
|
||||
`新口播文案:${row.skgCopy}`,
|
||||
`镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}。`,
|
||||
`当前要生成的画面:${target}`,
|
||||
`另一端画面用于连续性参考:${opposite}`,
|
||||
`画面规划:${row.visualPlan}`,
|
||||
`Storyboard beat ${row.index + 1}, ${role === "first_frame" ? "first frame" : "last frame"}.`,
|
||||
`New English voice-over line: ${row.skgCopy}`,
|
||||
`Narrative role: ${ROLE_LABELS_EN[row.role]}.`,
|
||||
`Visual mode: ${row.visualMode}.`,
|
||||
`Target endpoint frame to generate now: ${target}`,
|
||||
`Opposite endpoint continuity reference: ${opposite}`,
|
||||
`Overall visual plan: ${row.visualPlan}`,
|
||||
row.needsSubject
|
||||
? `人物主体 brief:${subjectBrief || "主体 brief 暂缺,请保持一个统一的商业广告主体,肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述,不上传主体参考图;可以根据本镜头自由改变动作、景别、表情和环境,但不能换成另一个人设。不要回到原视频关键帧复刻人物。`
|
||||
: "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。",
|
||||
? `Subject identity brief: ${subjectBrief || "Subject brief is missing. Keep one unified commercial ad subject with clear neck-and-shoulder area for product placement."}. Use only this text identity brief; no subject reference image is uploaded. The subject may freely change pose, framing, expression, gesture, and environment for this shot, but must not become a different character. Do not copy the original source-video person or keyframe.`
|
||||
: "This beat does not need a main character. If people appear, they should only be partial hands, back-view background figures, or environmental figures; do not generate the transparent skeleton main subject.",
|
||||
row.needsProduct
|
||||
? `产品融入:${row.productPlacement}。${row.productIntegration}。本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品硬参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。`
|
||||
: "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。",
|
||||
"输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI,不要水印。画面要能作为后续视频生成的明确起止帧。",
|
||||
? `Product integration: ${row.productPlacement}. ${row.productIntegration}. This request provides ${selectedProductItems.length} rigid reference image(s) of the same SKG neck-and-shoulder massager: ${productNotes}. The product is a U-shaped wearable device worn around the neck and shoulders. Preserve realistic wearable scale, left-right asymmetry, button placement, contact pads, side thickness, and neck-contact position.`
|
||||
: "Do not show the product in this beat. Do not force-generate an SKG product, package, white-background product image, or random merchandise.",
|
||||
"Output one single 9:16 high-definition endpoint frame. No contact sheet, no multiple views, no subtitles, no platform UI, no watermark. The image must work as a clear first/last frame for downstream video generation.",
|
||||
].join("\n")
|
||||
}
|
||||
|
||||
@@ -1462,10 +1659,10 @@ function buildStoryboardSceneFromAudioRow(
|
||||
const subjectNotes = subjectReferenceNotes(subjectRefs)
|
||||
const subjectBrief = subjectBriefForEndpoint(row, subjectRefs)
|
||||
const productGuidance = !row.needsProduct
|
||||
? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。"
|
||||
? "This beat is planned without product visibility or without product as the visual subject. Do not force-insert an SKG product, package, white-background product render, or incorrect merchandise during video generation."
|
||||
: productItems.length
|
||||
? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。产品硬定义:这是套在脖子上的 U 形肩颈按摩仪,不是耳机、头戴设备或护颈枕。坐标系硬规则:左/右按佩戴者身体左右,不能按图片左右;上=靠近下巴/脸/颈部上沿,下=靠近锁骨/肩部下沿;内侧=贴颈皮肤/按摩触点,外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考,不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。`
|
||||
: "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。"
|
||||
? `The product pool has ${productItems.length} image(s); this beat selects only the ${selectedProductItems.length} most relevant reference image(s). Do not mix unselected assets into this shot. Rigid product definition: this is a U-shaped neck-and-shoulder wearable massager, not headphones, a headset, or a neck pillow. Coordinate rule: left/right refer to the wearer's body, not the image; top means closer to chin/face/upper neck, bottom means closer to collarbone/shoulders; inner means skin-contact side and massage pads, outer means shell/buttons/logo. Selected images are only product structure, angle, scale, and detail references; do not copy the white/black/studio background. View notes: ${notes}. Preserve left-right asymmetry; do not mirror the two sides. The shoulder-neck product size must match realistic wearing scale, not earphone-small and not neck-pillow-large.`
|
||||
: "No product images are uploaded. Use the default SKG product concept only if needed, and preferably establish a same-product pool before generation to lock left-right differences, thickness, and wearing scale."
|
||||
return {
|
||||
duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)),
|
||||
first_image: endpointRefs.firstImage ?? null,
|
||||
@@ -1482,11 +1679,11 @@ function buildStoryboardSceneFromAudioRow(
|
||||
subject_images: row.needsSubject ? subjectRefs : [],
|
||||
subject_image: row.needsSubject ? subjectRefs[0] ?? null : null,
|
||||
subject: row.needsSubject
|
||||
? `${subjectDescription}\n主体动作/画面要素:${row.keyElements}\n主体真源:从已生成的相似主体白底视图中按本镜头需求选择 ${subjectRefs.length} 张;${subjectNotes}。关键帧只用于前置主体提取,不作为后续视频首尾帧参考。`
|
||||
: "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。",
|
||||
scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划:${row.firstFramePlan}\n尾帧规划:${row.lastFramePlan}\n原音频依据:${row.source}`,
|
||||
product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式:${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`,
|
||||
action: `${row.skgCopy}\n连续动作:从首帧规划自然过渡到尾帧规划,镜头类型和产品/人物需求不能中途改变。`,
|
||||
? `${subjectDescription}\nSubject action and visual elements: ${row.keyElements}\nSubject source: select ${subjectRefs.length} generated similar-subject view(s) according to this shot's need; ${subjectNotes}. Source keyframes are only used for upstream subject extraction and must not be used as direct endpoint-frame references.`
|
||||
: "No main character or similar-subject reference is needed for this beat. If people appear, they should be background or partial-body context, not the main subject.",
|
||||
scene: `Visual mode: ${row.visualMode}\n${row.visualPlan}\nFirst-frame plan: ${row.firstFramePlan}\nLast-frame plan: ${row.lastFramePlan}\nSource audio reference: ${row.source}`,
|
||||
product: `Product requirement: ${row.needsProduct ? "product reference required" : "no product required for this beat"}\nProduct placement: ${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "This beat focuses on emotion, subject state, space, or pacing transition and should not show the product."}\n${productGuidance}`,
|
||||
action: `${row.skgCopy}\nContinuity action: transition naturally from the first-frame plan to the last-frame plan. The visual mode and product/subject requirements must not change mid-clip.`,
|
||||
reference_ids: [],
|
||||
}
|
||||
}
|
||||
@@ -3103,8 +3300,10 @@ function AudioStoryboardPlanPanel({
|
||||
const [productAnalyzing, setProductAnalyzing] = useState(false)
|
||||
const [productAngleBusy, setProductAngleBusy] = useState<string | null>(null)
|
||||
const [copyOverrides, setCopyOverrides] = useState<Record<number, string>>({})
|
||||
const [copyZhOverrides, setCopyZhOverrides] = useState<Record<number, string>>({})
|
||||
const [planOverrides, setPlanOverrides] = useState<Record<number, RowPlanPatch>>({})
|
||||
const [authorIntent, setAuthorIntent] = useState("")
|
||||
const [showChineseMirror, setShowChineseMirror] = useState(true)
|
||||
const [scriptRewriteBusy, setScriptRewriteBusy] = useState<"all" | number | null>(null)
|
||||
const productFileRef = useRef<HTMLInputElement | null>(null)
|
||||
const productPersistSeq = useRef(0)
|
||||
@@ -3146,6 +3345,7 @@ function AudioStoryboardPlanPanel({
|
||||
}
|
||||
|
||||
const copyForRow = (row: AudioStoryboardRow) => copyOverrides[row.index] ?? row.skgCopy
|
||||
const copyZhForRow = (row: AudioStoryboardRow) => copyZhOverrides[row.index] ?? row.skgCopyZh
|
||||
|
||||
const patchRowCopy = (rowIndex: number, value: string) => {
|
||||
setCopyOverrides((prev) => ({ ...prev, [rowIndex]: value }))
|
||||
@@ -3163,7 +3363,9 @@ function AudioStoryboardPlanPanel({
|
||||
needsProduct: defaults.needsProduct,
|
||||
needsSubject: defaults.needsSubject,
|
||||
subjectDescription: row ? buildSubjectDescription(row.role, mode) : "",
|
||||
subjectDescriptionZh: row ? buildSubjectDescriptionZh(row.role, mode) : "",
|
||||
productPlacement: defaults.productPlacement,
|
||||
productPlacementZh: visualModeDefaults(mode, "zh").productPlacement,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3328,7 +3530,7 @@ function AudioStoryboardPlanPanel({
|
||||
await analyzeAndCompleteProductViews(productItems.map((item) => item.ref))
|
||||
}
|
||||
|
||||
const applyScriptRewriteItems = (items: Array<{ index: number; text: string }>) => {
|
||||
const applyScriptRewriteItems = (items: Array<{ index: number; text: string; text_zh?: string }>) => {
|
||||
if (!items.length) return
|
||||
setCopyOverrides((prev) => {
|
||||
const next = { ...prev }
|
||||
@@ -3337,6 +3539,13 @@ function AudioStoryboardPlanPanel({
|
||||
}
|
||||
return next
|
||||
})
|
||||
setCopyZhOverrides((prev) => {
|
||||
const next = { ...prev }
|
||||
for (const item of items) {
|
||||
if (item.text_zh?.trim()) next[item.index] = item.text_zh.trim()
|
||||
}
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const rewriteSingleRow = async (row: AudioStoryboardRow) => {
|
||||
@@ -3426,13 +3635,16 @@ function AudioStoryboardPlanPanel({
|
||||
setEndpointFrameBusy(busyKey)
|
||||
try {
|
||||
await saveRowStoryboardDraft(plannedRow, frame)
|
||||
const rawPrompt = buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief)
|
||||
const prompt = await ensureEnglishForModel(rawPrompt)
|
||||
const englishSubjectBrief = await ensureEnglishForModel(subjectBrief)
|
||||
const updated = await generateSceneAsset(job.id, frame.index, {
|
||||
size: SUBJECT_ASSET_SIZE,
|
||||
scene_mode: "similar",
|
||||
scene_style: "premium_product",
|
||||
asset_role: role,
|
||||
prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief),
|
||||
subject_brief: subjectBrief,
|
||||
prompt,
|
||||
subject_brief: englishSubjectBrief,
|
||||
product_images: selectedProductItems.map((item) => item.ref),
|
||||
source_frame_indices: [],
|
||||
})
|
||||
@@ -3622,6 +3834,13 @@ function AudioStoryboardPlanPanel({
|
||||
/>
|
||||
<div className="flex items-center justify-end gap-2">
|
||||
<ModelTrace trace={scriptRewriteModelTrace(runtimeModels)} compact />
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowChineseMirror((value) => !value)}
|
||||
className="inline-flex h-9 items-center justify-center rounded-md border border-white/10 bg-white/[0.045] px-2.5 text-[11px] font-semibold text-white/60 transition hover:border-white/25 hover:text-white"
|
||||
>
|
||||
{showChineseMirror ? "收起中文" : "显示中文"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void rewriteAllRows()}
|
||||
@@ -3633,7 +3852,10 @@ function AudioStoryboardPlanPanel({
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setCopyOverrides({})}
|
||||
onClick={() => {
|
||||
setCopyOverrides({})
|
||||
setCopyZhOverrides({})
|
||||
}}
|
||||
disabled={scriptRewriteBusy !== null || !Object.keys(copyOverrides).length}
|
||||
className="inline-flex h-9 items-center justify-center rounded-md border border-white/10 bg-white/[0.045] px-2.5 text-[11px] font-semibold text-white/60 transition hover:border-white/25 hover:text-white disabled:cursor-not-allowed disabled:opacity-35"
|
||||
>
|
||||
@@ -3657,6 +3879,7 @@ function AudioStoryboardPlanPanel({
|
||||
const rowVideos = videosForFrame(referenceFrame)
|
||||
const savingStoryboard = storyboardSaveBusyRow === row.index
|
||||
const copyText = copyForRow(row)
|
||||
const copyZhText = copyZhForRow(row)
|
||||
const selectedProductCount = plannedRow.needsProduct ? selectProductItemsForRow(plannedRow, productItems, "endpoint").length : 0
|
||||
const endpointSubjectBrief = plannedRow.needsSubject ? subjectBriefForEndpoint(plannedRow, subjectRefs) : ""
|
||||
return (
|
||||
@@ -3667,12 +3890,15 @@ function AudioStoryboardPlanPanel({
|
||||
<StoryboardPlanCell label="分镜">
|
||||
<div className="font-mono text-[11px] text-white/40">{row.start.toFixed(1)}-{row.end.toFixed(1)}s</div>
|
||||
<div className="mt-1.5 inline-flex max-w-full rounded-md border border-emerald-300/15 bg-emerald-300/[0.08] px-1.5 py-0.5 text-[10px] leading-tight text-emerald-100/80">
|
||||
{row.role}
|
||||
{ROLE_LABELS_ZH[row.role]}
|
||||
</div>
|
||||
</StoryboardPlanCell>
|
||||
|
||||
<StoryboardPlanCell label="原内容">
|
||||
<p className="line-clamp-2 text-[10.5px] leading-snug" title={row.source}>{row.source}</p>
|
||||
{showChineseMirror && row.sourceZh ? (
|
||||
<p className="mt-1 line-clamp-2 text-[10px] leading-snug text-white/34" title={row.sourceZh}>中:{row.sourceZh}</p>
|
||||
) : null}
|
||||
</StoryboardPlanCell>
|
||||
|
||||
<StoryboardPlanCell label={`${scriptStep.no} 新口播文案`}>
|
||||
@@ -3681,6 +3907,9 @@ function AudioStoryboardPlanPanel({
|
||||
onChange={(event) => patchRowCopy(row.index, event.target.value)}
|
||||
className="min-h-[64px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[11px] leading-snug text-white/82 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
|
||||
/>
|
||||
{showChineseMirror && copyZhText ? (
|
||||
<p className="mt-1 line-clamp-2 text-[10px] leading-snug text-white/34" title={copyZhText}>中:{copyZhText}</p>
|
||||
) : null}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void rewriteSingleRow(row)}
|
||||
@@ -3730,13 +3959,21 @@ function AudioStoryboardPlanPanel({
|
||||
placeholder="画面规划"
|
||||
className="min-h-[42px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/76 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
|
||||
/>
|
||||
{showChineseMirror && plannedRow.visualPlanZh ? (
|
||||
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-white/32" title={plannedRow.visualPlanZh}>中:{plannedRow.visualPlanZh}</p>
|
||||
) : null}
|
||||
{plannedRow.needsSubject && (
|
||||
<textarea
|
||||
value={plannedRow.subjectDescription}
|
||||
onChange={(event) => patchRowPlan(row.index, { subjectDescription: event.target.value })}
|
||||
placeholder="人物描述:主体身份、姿态、情绪、需要用哪些视角"
|
||||
className="min-h-[42px] w-full resize-y rounded border border-violet-300/12 bg-violet-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-violet-50/78 outline-none placeholder:text-white/25 focus:border-violet-300/50"
|
||||
/>
|
||||
<>
|
||||
<textarea
|
||||
value={plannedRow.subjectDescription}
|
||||
onChange={(event) => patchRowPlan(row.index, { subjectDescription: event.target.value })}
|
||||
placeholder="人物描述:主体身份、姿态、情绪、需要用哪些视角"
|
||||
className="min-h-[42px] w-full resize-y rounded border border-violet-300/12 bg-violet-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-violet-50/78 outline-none placeholder:text-white/25 focus:border-violet-300/50"
|
||||
/>
|
||||
{showChineseMirror && plannedRow.subjectDescriptionZh ? (
|
||||
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-violet-100/34" title={plannedRow.subjectDescriptionZh}>中:{plannedRow.subjectDescriptionZh}</p>
|
||||
) : null}
|
||||
</>
|
||||
)}
|
||||
<div className="grid gap-1 md:grid-cols-2">
|
||||
<textarea
|
||||
@@ -3752,12 +3989,21 @@ function AudioStoryboardPlanPanel({
|
||||
className="min-h-[48px] w-full resize-y rounded border border-cyan-300/12 bg-cyan-300/[0.04] px-2 py-1.5 text-[10.5px] leading-snug text-cyan-50/78 outline-none placeholder:text-white/25 focus:border-cyan-300/50"
|
||||
/>
|
||||
</div>
|
||||
{showChineseMirror && (plannedRow.firstFramePlanZh || plannedRow.lastFramePlanZh) ? (
|
||||
<div className="-mt-1 grid gap-1 md:grid-cols-2">
|
||||
<p className="line-clamp-2 text-[10px] leading-snug text-emerald-100/34" title={plannedRow.firstFramePlanZh}>中:{plannedRow.firstFramePlanZh}</p>
|
||||
<p className="line-clamp-2 text-[10px] leading-snug text-cyan-100/34" title={plannedRow.lastFramePlanZh}>中:{plannedRow.lastFramePlanZh}</p>
|
||||
</div>
|
||||
) : null}
|
||||
<textarea
|
||||
value={plannedRow.productPlacement}
|
||||
onChange={(event) => patchRowPlan(row.index, { productPlacement: event.target.value })}
|
||||
placeholder="产品出现方式:不出现 / 首帧出现 / 尾帧出现 / 全程佩戴 / 产品特写"
|
||||
className="min-h-[38px] w-full resize-y rounded border border-white/10 bg-black/32 px-2 py-1.5 text-[10.5px] leading-snug text-white/68 outline-none placeholder:text-white/25 focus:border-rose-300/45"
|
||||
/>
|
||||
{showChineseMirror && plannedRow.productPlacementZh ? (
|
||||
<p className="-mt-1 line-clamp-2 text-[10px] leading-snug text-white/32" title={plannedRow.productPlacementZh}>中:{plannedRow.productPlacementZh}</p>
|
||||
) : null}
|
||||
<div className="grid gap-1.5 md:grid-cols-[minmax(0,1fr)_88px_88px]">
|
||||
<div className="rounded border border-white/10 bg-black/24 px-2 py-1.5 text-[10px] leading-snug text-white/42">
|
||||
<div className="mb-1 flex items-center justify-between gap-2">
|
||||
@@ -3802,6 +4048,8 @@ function AudioStoryboardPlanPanel({
|
||||
onClick={() => patchRowPlan(row.index, {
|
||||
...visualModeDefaults(plannedRow.visualMode),
|
||||
subjectDescription: buildSubjectDescription(plannedRow.role, plannedRow.visualMode),
|
||||
subjectDescriptionZh: buildSubjectDescriptionZh(plannedRow.role, plannedRow.visualMode),
|
||||
productPlacementZh: visualModeDefaults(plannedRow.visualMode, "zh").productPlacement,
|
||||
})}
|
||||
className="rounded border border-white/10 px-1.5 py-0.5 text-white/42 transition hover:border-white/25 hover:text-white/72"
|
||||
>
|
||||
|
||||
@@ -59,6 +59,7 @@ export interface KeyElement {
|
||||
subject_kind?: SubjectKind
|
||||
subject_assets?: SubjectAsset[]
|
||||
subject_consensus_brief?: string
|
||||
subject_consensus_brief_zh?: string
|
||||
created_at?: number
|
||||
}
|
||||
|
||||
@@ -261,7 +262,7 @@ export async function rewriteStoryboardScript(
|
||||
author_intent?: string
|
||||
segments: StoryboardScriptRewriteSegment[]
|
||||
},
|
||||
): Promise<{ items: Array<{ index: number; text: string }> }> {
|
||||
): Promise<{ items: Array<{ index: number; text: string; text_zh?: string }> }> {
|
||||
const res = await fetch(`${API_BASE}/jobs/${jobId}/script/rewrite`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
@@ -573,6 +574,7 @@ export interface CharacterLibraryItem {
|
||||
folder: string
|
||||
description: string
|
||||
prompt_brief?: string
|
||||
prompt_brief_zh?: string
|
||||
primary_image: string
|
||||
images: CharacterLibraryImage[]
|
||||
}
|
||||
@@ -599,6 +601,7 @@ export interface SubjectTemplateItem {
|
||||
description: string
|
||||
note: string
|
||||
prompt_brief?: string
|
||||
prompt_brief_zh?: string
|
||||
source: "database"
|
||||
source_job_id: string
|
||||
source_frame_idx: number
|
||||
@@ -623,6 +626,7 @@ export interface AudioScript {
|
||||
source_text: string
|
||||
source_zh: string
|
||||
rewritten_text: string
|
||||
rewritten_text_zh?: string
|
||||
speaker_profile: string
|
||||
rhythm_profile: string
|
||||
background_audio_profile: string
|
||||
|
||||
Reference in New Issue
Block a user