diff --git a/api/main.py b/api/main.py index ff1f1ab..45dad7a 100644 --- a/api/main.py +++ b/api/main.py @@ -91,7 +91,7 @@ YTDLP_COOKIES_FILE = os.getenv("YTDLP_COOKIES_FILE", "").strip() YTDLP_COOKIES_FROM_BROWSER = os.getenv("YTDLP_COOKIES_FROM_BROWSER", "").strip() AUDIO_PRODUCT_BRIEF = os.getenv( "AUDIO_PRODUCT_BRIEF", - "SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。", + "SKG smart massage products for everyday neck-and-shoulder, back, eye, knee, or foot relaxation. Ads should feel premium, clean, trustworthy, and must not make medical efficacy claims.", ).strip() AUDIO_REWRITE_MODEL = gpt_model_env("AUDIO_REWRITE_MODEL", REWRITE_MODEL) VOICE_PROVIDER = "azure_openai" @@ -454,6 +454,7 @@ class CharacterLibraryItem(BaseModel): folder: str = "" description: str = "" prompt_brief: str = "" + prompt_brief_zh: str = "" primary_image: str = "" images: list[CharacterLibraryImage] = Field(default_factory=list) @@ -480,6 +481,7 @@ class SubjectTemplateItem(BaseModel): description: str = "" note: str = "" prompt_brief: str = "" + prompt_brief_zh: str = "" source: Literal["database"] = "database" source_job_id: str = "" source_frame_idx: int = -1 @@ -534,6 +536,7 @@ class KeyElement(BaseModel): subject_kind: SubjectKind = "object" subject_assets: list[SubjectAsset] = Field(default_factory=list) subject_consensus_brief: str = "" + subject_consensus_brief_zh: str = "" created_at: float = 0.0 @@ -565,6 +568,7 @@ class AudioScript(BaseModel): source_text: str = "" source_zh: str = "" rewritten_text: str = "" + rewritten_text_zh: str = "" speaker_profile: str = "" rhythm_profile: str = "" background_audio_profile: str = "" @@ -2307,7 +2311,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ fallback = _fallback_audio_profile(segments, target_seconds) if not LLM_API_KEY or not wav.exists(): return fallback - transcript = _transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript." + transcript = _ensure_english(_transcript_join(segments, "en") or _transcript_join(segments, "zh") or "No reliable transcript.") try: audio_b64 = base64.b64encode(wav.read_bytes()).decode("ascii") except Exception: @@ -2373,12 +2377,15 @@ def _build_audio_intake_sync(job_id: str, wav: Path, segments: list[TranscriptSe ) -def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str]: +def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds: float = 12.0) -> tuple[str, str, str]: fallback = _fallback_audio_script(segments, target_seconds) + try: + fallback_zh = _translate_text_sync(fallback, "zh", max_tokens=300) if LLM_API_KEY else "" + except Exception: + fallback_zh = "" if not LLM_API_KEY: - return fallback, "LLM_API_KEY 未配置,使用本地 SKG 模板" + return fallback, fallback_zh, "LLM_API_KEY 未配置,使用本地 SKG 模板" source_text = _transcript_join(segments, "en") - source_zh = _transcript_join(segments, "zh") min_words, max_words = _voiceover_target_words(target_seconds) prompt = ( "You are an English short-video voice-over writer for SKG wellness massagers. " @@ -2392,10 +2399,9 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds "5. Introduce SKG products directly: smart massage, warmth, rhythm, daily neck/back/eye/knee/foot relaxation.\n" "6. Keep it easy for TTS: short sentences, spoken phrasing, no hashtags, no stage directions, no quotation marks.\n" "7. If the source transcript is thin, ignore it and write a general SKG product intro.\n" - 'Return strict JSON only: {"rewritten_text":"..."}.\n\n' - f"SKG product context: {AUDIO_PRODUCT_BRIEF}\n\n" - f"English transcript:\n{source_text or 'None'}\n\n" - f"Chinese translation for reference:\n{source_zh or 'None'}" + 'Return strict JSON only: {"rewritten_text":"English VO","rewritten_text_zh":"Simplified Chinese mirror for team review"}.\n\n' + f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n" + f"English transcript:\n{source_text or 'None'}" ) try: resp = llm().chat.completions.create( @@ -2415,9 +2421,12 @@ def _rewrite_audio_script_sync(segments: list[TranscriptSegment], target_seconds raw = match.group(0) if match else raw data = json.loads(raw) text = str(data.get("rewritten_text", "")).strip() - return (text or fallback), "" + text_zh = str(data.get("rewritten_text_zh", "")).strip() + if text and not text_zh: + text_zh = _translate_text_sync(text, "zh", max_tokens=300) + return (text or fallback), (text_zh or fallback_zh), "" except Exception as e: - return fallback, f"改写失败,使用本地模板:{e}" + return fallback, fallback_zh, f"改写失败,使用本地模板:{e}" def _choose_azure_voice_id() -> str: @@ -2521,7 +2530,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar source_text = _transcript_join(segments, "en") source_zh = _transcript_join(segments, "zh") duration = max(float(target_seconds or 0), _segment_duration(segments), 4.0) - rewritten, rewrite_error = _rewrite_audio_script_sync(segments, duration) + rewritten, rewritten_zh, rewrite_error = _rewrite_audio_script_sync(segments, duration) selected_voice_id = _choose_tts_voice_id() speaker_profile, rhythm_profile = _audio_delivery_profile(segments, duration, selected_voice_id) voice_url = "" @@ -2539,6 +2548,7 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment], tar source_text=source_text, source_zh=source_zh, rewritten_text=rewritten, + rewritten_text_zh=rewritten_zh, speaker_profile=speaker_profile, rhythm_profile=rhythm_profile, product_brief=AUDIO_PRODUCT_BRIEF, @@ -3055,6 +3065,55 @@ class RewriteStoryboardScriptReq(BaseModel): segments: list[ScriptRewriteSegmentReq] = Field(default_factory=list) +_TRANSLATION_CACHE: dict[str, str] = {} + + +def _contains_cjk(text: str) -> bool: + return bool(re.search(r"[\u3400-\u9fff]", text or "")) + + +def _translate_text_sync(text: str, target: Literal["en", "zh"] = "en", *, max_tokens: int = 700) -> str: + text = (text or "").strip() + if not text or not LLM_API_KEY: + return text + target_label = "English" if target == "en" else "Simplified Chinese" + prompt = ( + f"Translate the following TikTok ad planning text into concise natural {target_label}. " + "Preserve concrete product, camera, subject, timing, and structure details. " + "Do not add commentary, markdown, quotes, or explanations.\n\n" + f"Input:\n{text}" + ) + resp = llm().chat.completions.create( + model=TRANSLATE_MODEL, + messages=[{"role": "user", "content": prompt}], + temperature=0.15, + max_tokens=max_tokens, + ) + out = (resp.choices[0].message.content or "").strip() + if not out: + rc = getattr(resp.choices[0].message, "reasoning_content", "") or "" + if rc: + out = rc.strip().splitlines()[-1].strip() + return re.sub(r'^[\'"「『]+|[\'"」』]+$', "", out).strip() or text + + +def _ensure_english(text: str) -> str: + text = (text or "").strip() + if not text or not _contains_cjk(text): + return text + key = hashlib.sha256(("en\0" + text).encode("utf-8")).hexdigest() + cached = _TRANSLATION_CACHE.get(key) + if cached: + return cached + try: + translated = _translate_text_sync(text, "en", max_tokens=max(700, min(3500, len(text) // 2 + 900))) + _TRANSLATION_CACHE[key] = translated + return translated + except Exception as e: + print(f"[ensure english fallback] {e}", flush=True) + return text + + @app.post("/translate") def translate_text(req: TranslateReq) -> dict: """单条文本翻译(给生图自定义提取元素 zh→en 用)""" @@ -3092,22 +3151,26 @@ def translate_text(req: TranslateReq) -> dict: def _fallback_script_rewrite_item(segment: ScriptRewriteSegmentReq, author_intent: str = "") -> dict: source = (segment.source or "").strip() - intent = (author_intent or "").strip() + intent = _ensure_english(author_intent or "") role = segment.role or "" templates = { - "开场钩子": "你有没有发现,低头久了以后,脖子和肩膀会先替你喊累。", - "痛点推进": "刷手机、坐电脑、赶通勤叠在一起,肩颈很容易一直绷着放不下来。", - "利益证明": "SKG 这种挂脖按摩仪,重点就是贴住肩颈位置,把热敷感和揉按感带到真正紧的地方。", - "方案过渡": "这一段可以直接拍拿起、戴上、贴合,让产品自然进入日常放松场景。", - "转化收口": "如果你也想把肩颈放松变成每天的小习惯,可以从这台 SKG 开始。", - "节奏承接": "顺着原片节奏,把这一句落到一个具体的肩颈使用场景里。", + "hook": "Have you noticed that after hours of looking down, your neck and shoulders complain before you do?", + "pain": "Phone scrolling, desk work, and commuting can keep your neck and shoulders tight all day.", + "proof": "An SKG wearable massager sits around the neck and shoulders, bringing warm, rhythmic comfort to the spots that feel tense.", + "solution": "This beat can simply show pick up, wear, fit, and relax, so the product enters a normal daily routine.", + "cta": "If you want neck-and-shoulder relaxation to become a daily habit, start with this SKG massager.", + "bridge": "Follow the source rhythm, but land this line in one specific neck-and-shoulder use moment.", } - rewritten = templates.get(role, templates["节奏承接"]) - if source and role not in {"开场钩子", "转化收口"}: - rewritten = f"{rewritten} 原片这一句的节奏可以保留,但内容换成 SKG 的佩戴和放松体验。" + rewritten = templates.get(role, templates["bridge"]) + if source and role not in {"hook", "cta"}: + rewritten = f"{rewritten} Keep the source sentence rhythm, but replace the content with SKG wearing and relaxation experience." if intent: - rewritten = f"{rewritten} 语气按作者想法处理:{intent[:44]}。" - return {"index": segment.index, "text": rewritten[:220]} + rewritten = f"{rewritten} Adjust the tone based on the creator note: {intent[:90]}." + try: + zh = _translate_text_sync(rewritten, "zh", max_tokens=260) if LLM_API_KEY else "" + except Exception: + zh = "" + return {"index": segment.index, "text": rewritten[:260], "text_zh": zh} def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentReq], author_intent: str = "") -> list[dict]: @@ -3123,7 +3186,7 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe raw_items = data.get("items") if isinstance(data, dict) else data if not isinstance(raw_items, list): raw_items = [] - by_index: dict[int, str] = {} + by_index: dict[int, tuple[str, str]] = {} for item in raw_items: if not isinstance(item, dict): continue @@ -3132,19 +3195,27 @@ def _parse_script_rewrite_items(raw: str, requested: list[ScriptRewriteSegmentRe except Exception: continue value = str(item.get("text") or item.get("rewritten_text") or "").strip() + value_zh = str(item.get("text_zh") or item.get("rewritten_text_zh") or "").strip() if value: - by_index[idx] = re.sub(r"\s+", " ", value).strip()[:260] - return [ - {"index": segment.index, "text": by_index.get(segment.index) or _fallback_script_rewrite_item(segment, author_intent)["text"]} - for segment in requested - ] + by_index[idx] = (re.sub(r"\s+", " ", value).strip()[:260], re.sub(r"\s+", " ", value_zh).strip()[:260]) + items = [] + for segment in requested: + fallback = _fallback_script_rewrite_item(segment, author_intent) + text, text_zh = by_index.get(segment.index, ("", "")) + if text and not text_zh: + try: + text_zh = _translate_text_sync(text, "zh", max_tokens=260) if LLM_API_KEY else "" + except Exception: + text_zh = "" + items.append({"index": segment.index, "text": text or fallback["text"], "text_zh": text_zh or fallback.get("text_zh", "")}) + return items def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dict]: segments = [segment for segment in req.segments if (segment.source or segment.current_text).strip()] if not segments: return [] - author_intent = (req.author_intent or "").strip() + author_intent = _ensure_english(req.author_intent or "") if not LLM_API_KEY: return [_fallback_script_rewrite_item(segment, author_intent) for segment in segments] payload = [ @@ -3152,26 +3223,27 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic "index": segment.index, "time": f"{segment.start:.1f}-{segment.end:.1f}s", "role": segment.role, - "source_reference": segment.source, - "current_voiceover": segment.current_text, + "source_reference": _ensure_english(segment.source), + "current_voiceover": _ensure_english(segment.current_text), } for segment in segments ] prompt = ( - "你是信息流广告脚本文案改写师。任务:基于原参考文案的节奏和信息结构,把每段改写成 SKG 挂脖肩颈按摩仪的新口播文案。\n" - "硬规则:\n" - "1. 输出中文短视频口播,不要英文,不要舞台说明,不要引号。\n" - "2. 不逐字翻译原文,不保留原品牌、价格、优惠码、平台话术;只参考节奏、钩子、痛点、转化结构。\n" - "3. 产品固定为套在脖子上的 U 形肩颈按摩仪,表达肩颈紧绷、久坐低头、热敷感、揉按感、佩戴放松和日常使用场景。\n" - "4. 避免医疗疗效、治疗、治愈、止痛等强功效承诺。\n" - "5. 每段尽量短,适配该段时间;保持自然创作者口吻。\n" - "6. mode=all 时,整片要前后连贯;mode=segment 时,只改给定段落但仍要贴合上下文风格。\n" - f"作者想法:{author_intent or '没有额外想法,按原片节奏改成自然卖点口播。'}\n" - f"改写模式:{req.mode}\n" - f"SKG 产品背景:{AUDIO_PRODUCT_BRIEF}\n\n" - "输入段落 JSON:\n" + "You are an information-feed ad voice-over rewrite specialist. Rewrite each segment into a new ENGLISH SKG neck-and-shoulder massager voice-over line while preserving the source rhythm and information structure.\n" + "Hard rules:\n" + "1. The main text field must be English short-video VO. No stage directions, no quotes.\n" + "2. Do not translate word-for-word. Do not keep the original brand, price, discount code, platform CTA, or exact claims; only reuse rhythm, hook, pain-point, proof, and conversion structure.\n" + "3. The product is a U-shaped neck-and-shoulder wearable massager worn around the neck. Express neck/shoulder tension, desk posture, looking down, warmth, kneading-like comfort, wearing, relaxation, and daily use.\n" + "4. Avoid medical treatment, cure, pain elimination, clinical, or disease claims.\n" + "5. Keep each segment short enough for its time range and natural for a creator voice.\n" + "6. If mode=all, make the whole piece coherent; if mode=segment, rewrite only the given segment while matching the broader style.\n" + "7. Also return a Simplified Chinese mirror for team review in text_zh; it is not for model prompts.\n" + f"Creator note: {author_intent or 'No extra note; follow the source pacing and turn it into natural SKG product VO.'}\n" + f"Rewrite mode: {req.mode}\n" + f"SKG product context: {_ensure_english(AUDIO_PRODUCT_BRIEF)}\n\n" + "Input segments JSON:\n" + json.dumps(payload, ensure_ascii=False) - + '\n\n只输出严格 JSON:{"items":[{"index":0,"text":"改写后的中文口播"}]}' + + '\n\nReturn strict JSON only: {"items":[{"index":0,"text":"rewritten English VO","text_zh":"中文镜像"}]}' ) models = [] for model in [AUDIO_REWRITE_MODEL, ASR_FALLBACK_MODEL, TRANSLATE_MODEL]: @@ -3182,7 +3254,7 @@ def _rewrite_storyboard_script_sync(req: RewriteStoryboardScriptReq) -> list[dic resp = llm().chat.completions.create( model=model, messages=[ - {"role": "system", "content": "只返回合法 JSON,不要 markdown,不要解释。"}, + {"role": "system", "content": "Return valid JSON only. No markdown. No explanation."}, {"role": "user", "content": prompt}, ], response_format={"type": "json_object"}, @@ -3950,6 +4022,7 @@ class UpdateElementReq(BaseModel): name_en: str | None = None position: str | None = None subject_consensus_brief: str | None = None + subject_consensus_brief_zh: str | None = None class GenerateSceneAssetReq(BaseModel): @@ -3998,8 +4071,8 @@ class GenerateSubjectAssetsReq(BaseModel): def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) -> str: if not profile: return "" - prompt_summary = (profile.prompt_summary or "").strip() - resolved_summary = (profile.resolved_summary or "").strip() + prompt_summary = _ensure_english(profile.prompt_summary or "") + resolved_summary = _ensure_english(profile.resolved_summary or "") if prompt_summary: body = prompt_summary[:1400] else: @@ -4013,7 +4086,7 @@ def _subject_profile_prompt_clause(profile: SubjectProfilePreference | None) -> ("hair style", profile.hair), ("commercial mood", profile.mood), ] - body = "; ".join(f"{name}: {value.strip()}" for name, value in parts if value and value.strip())[:1400] + body = "; ".join(f"{name}: {_ensure_english(value.strip())}" for name, value in parts if value and value.strip())[:1400] if not body and not resolved_summary: return "" mode = "random-composed" if profile.mode == "random" else "manually selected" @@ -4125,7 +4198,9 @@ def update_element(job_id: str, idx: int, element_id: str, req: UpdateElementReq if req.position is not None: e.position = req.position.strip() if req.subject_consensus_brief is not None: - e.subject_consensus_brief = req.subject_consensus_brief.strip()[:2200] + e.subject_consensus_brief = _ensure_english(req.subject_consensus_brief.strip())[:2200] + if req.subject_consensus_brief_zh is not None: + e.subject_consensus_brief_zh = req.subject_consensus_brief_zh.strip()[:2200] new_frames.append(f) if not found: raise HTTPException(404, "element not found") @@ -4208,7 +4283,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J if confirmed_subjects else "Remove the main foreground subject from the frame if present. " ) - subject_brief = req.subject_brief.strip() + subject_brief = _ensure_english(req.subject_brief.strip()) subject_brief_clause = ( f"Subject identity (text only, no image reference): {subject_brief[:1800]}. " "Maintain this identity across this and other endpoint frames in the same storyboard. " @@ -4237,7 +4312,7 @@ def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> J "warm_lifestyle": "Use a warm lifestyle style: realistic lived-in details, soft natural light, approachable atmosphere.", "cinematic": "Use a cinematic style: dramatic but natural lighting, richer depth, filmic contrast, not fantasy.", }[req.scene_style] - user_prompt = req.prompt.strip() + user_prompt = _ensure_english(req.prompt.strip()) user_prompt_clause = ( "User scene direction: " + user_prompt[:1200] + " " if user_prompt @@ -4483,6 +4558,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat brief = template.prompt_brief.strip() or template.note.strip() or template.description.strip() if similar_mode and not brief: brief = _describe_subject_template_from_images(template.name, template.subject_style, template_paths, template.note) + brief = _ensure_english(brief) selected_template_brief = brief.strip() template_brief_clause = ( f"Reference character brief from saved database template '{template.name}': {brief}. " @@ -4496,6 +4572,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat character_label = character.name character_reference_paths.extend(character_library_file(image.filename) for image in character.images[:7]) brief = character.prompt_brief.strip() or character.description.strip() + brief = _ensure_english(brief) selected_template_brief = brief.strip() template_brief_clause = ( f"Reference character brief from built-in creative character '{character.name}': {brief}. " @@ -4558,7 +4635,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat if req.reconstruction_mode == "similar" else "Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. " ) - prompt_extra = req.prompt.strip() + prompt_extra = _ensure_english(req.prompt.strip()) prompt_extra_clause = f"User direction: {prompt_extra[:1200]} " if prompt_extra else "" subject_profile_clause = _subject_profile_prompt_clause(req.subject_profile) identity_lock_clause = ( @@ -4709,7 +4786,7 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat ] fallback_brief = " ".join(part.strip() for part in fallback_parts if part and part.strip())[:1800] if selected_template_brief: - e.subject_consensus_brief = selected_template_brief[:1800] + e.subject_consensus_brief = _ensure_english(selected_template_brief)[:1800] else: asset_paths = [ job_dir(job_id) / "assets" / f"{asset.id}.jpg" @@ -4722,9 +4799,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat asset_paths, fallback_brief, ) - e.subject_consensus_brief = brief or current_brief or fallback_brief or ( + e.subject_consensus_brief = _ensure_english(brief or current_brief or fallback_brief or ( "Generated SKG ad subject; identity brief unavailable. Keep one consistent commercial subject with clear neck and shoulder placement area." - ) + ))[:1800] + if e.subject_consensus_brief and not e.subject_consensus_brief_zh: + try: + e.subject_consensus_brief_zh = _translate_text_sync(e.subject_consensus_brief, "zh", max_tokens=500)[:1800] + except Exception: + e.subject_consensus_brief_zh = "" new_frames.append(f) if generation_errors: msg = f"主体资产包部分生成完成 · {el.name_zh} · {len(generated)} 张,失败 {len(generation_errors)} 张" @@ -5296,18 +5378,23 @@ def save_subject_template(job_id: str, req: SaveSubjectTemplateReq) -> SubjectTe raise HTTPException(404, "subject asset files missing") primary = next((image.id for image in images if image.view == "front"), images[0].id) - prompt_brief = _describe_subject_template_from_images( + prompt_brief = _ensure_english(_describe_subject_template_from_images( name, req.subject_style, saved_image_paths, req.note.strip(), - ) or req.note.strip() + ) or req.note.strip()) + try: + prompt_brief_zh = _translate_text_sync(prompt_brief, "zh", max_tokens=500) if prompt_brief else "" + except Exception: + prompt_brief_zh = "" item = SubjectTemplateItem( id=template_id, name=name, description=req.note.strip(), note=req.note.strip(), prompt_brief=prompt_brief, + prompt_brief_zh=prompt_brief_zh, source_job_id=job_id, source_frame_idx=frame.index, source_element_id=element.id, diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 8fb1e54..c7456af 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -663,7 +663,7 @@ api/main.py
你看到的区域信息流复刻分镜工作台
-
主要源码AudioStoryboardPlanPanelProductReferenceCardMissingProductViewSlotbuildAudioStoryboardRowsselectProductItemsForRowsubjectAssetRefsForPlanningsubjectBriefForEndpointendpointAssetRefbuildEndpointFramePromptbuildStoryboardSceneFromAudioRowgenerateEndpointFrameForRowsaveRowStoryboardDraftsaveAllStoryboardDraftsEndpointFrameSlotStoryboardVideoSlots in web/components/ad-recreation-board.tsx;产品图、首尾帧和视频候选缩略图统一复用 MediaAssetTile,包括顶层 hover 放大和删除入口。产品白底图上传复用 uploadStoryboardAsset,视角自动识别调用 analyzeProductViews,缺角度自动补图调用 generateProductAngleAsset。当前单条/批量按钮只保存规划;首尾帧按钮调用 generateSceneAsset,传 subject_brief 和端点选择后的 1-2 张 product_images,不再传主体图或 contact sheet,再用 PUT /frames/{idx}/storyboard 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。web/app/page.tsx 的视频提交回调有暂停保护,旧入口误触也不会请求 /storyboard/video
+
主要源码AudioStoryboardPlanPanelProductReferenceCardMissingProductViewSlotbuildAudioStoryboardRowsselectProductItemsForRowsubjectAssetRefsForPlanningsubjectBriefForEndpointendpointAssetRefbuildEndpointFramePromptbuildStoryboardSceneFromAudioRowgenerateEndpointFrameForRowsaveRowStoryboardDraftsaveAllStoryboardDraftsEndpointFrameSlotStoryboardVideoSlots in web/components/ad-recreation-board.tsx;产品图、首尾帧和视频候选缩略图统一复用 MediaAssetTile,包括顶层 hover 放大和删除入口。产品白底图上传复用 uploadStoryboardAsset,视角自动识别调用 analyzeProductViews,缺角度自动补图调用 generateProductAngleAsset。当前单条/批量按钮只保存规划;首尾帧按钮调用 generateSceneAsset,传 subject_brief 和端点选择后的 1-2 张 product_images,不再传主体图或 contact sheet,再用 PUT /frames/{idx}/storyboard 保存 asset 首尾帧引用;首尾帧删除只移除本条规划中的引用,避免继续误用旧资产。语言策略由 AudioStoryboardRow 的英文主字段 + *Zh 镜像字段承载:role 内部是 hook/pain/proof/solution/cta/bridgebuildEndpointFramePromptStoryboardScene 主值默认英文,中文只用于团队阅读;首尾帧提交前前端 translateText 兜底,后端 _ensure_english 再兜底。web/app/page.tsx 的视频提交回调有暂停保护,旧入口误触也不会请求 /storyboard/video
适合怎么描述“按音频逐句生成产品分镜、每行怎样改写口播、哪几句不需要产品或人物、首帧/尾帧该怎么停、首尾帧是否已经生成并准确、产品素材池识别/补图后的备注是否准确、哪些分镜后续才值得进入单条视频候选”。
@@ -737,17 +737,19 @@ api/main.py cutout_id, subject_kind: object | living, subject_assets: SubjectAsset[], - subject_consensus_brief + subject_consensus_brief, + subject_consensus_brief_zh }

AudioScript

-

第一步音频解析的结构化产物。pipeline_transcribe 提取 audio.wav 后先保存原始转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。rewritten_textvoice_url 等字段仍保留给后续新配音阶段,当前第一步不默认写入。

+

第一步音频解析的结构化产物。pipeline_transcribe 提取 audio.wav 后先保存原始英文转写、中文翻译、讲话人画像、口播节奏和背景音乐/环境声/音效分析。rewritten_text 是英文新口播,rewritten_text_zh 只作为团队审稿镜像;voice_url 等字段仍保留给后续新配音阶段。

AudioScript {
   status: idle | rewriting | completed | failed,
   source_text,
   source_zh,
   rewritten_text,
+  rewritten_text_zh,
   speaker_profile,
   rhythm_profile,
   background_audio_profile,
@@ -777,10 +779,11 @@ SubjectAsset {
   width, height, size,
   source_frame_indices[]
 }
-

SubjectTemplateItem 保存用户确认过的主体视图包。prompt_brief 是后端从模板图反推的文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit。

+

SubjectTemplateItem 保存用户确认过的主体视图包。prompt_brief 是后端从模板图反推的英文文字特征,后续相似生成优先读取它,而不是再次把模板图作为强参考图传给 image-edit;prompt_brief_zh 仅用于模板库卡片和团队阅读。

SubjectTemplateItem {
   id, name, description, note,
   prompt_brief,
+  prompt_brief_zh,
   subject_style: transparent_human | source_actor,
   primary_image,
   images: SubjectTemplateImage[]
@@ -907,7 +910,7 @@ ProductRefStateItem {
             删除输入视频DELETE /jobs/{id}deleteJob从任务队列、URL 和磁盘 jobs/<id> 目录移除整个 job,包括源视频、关键帧、元素提取图和生成视频。
             解析视频POST /jobs/{id}/analyze?frames=&target=&mode=&quality=analyzeJob抽参考帧能力。当前开始流程会在视频下载完成后自动调用一次,默认 frames=12target=motionquality=accuratemode=replace,形成全局动作/节奏参考帧池;原版视频旁的“抽参考 12 帧”也会用同一参数显式重跑。target 仍支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值。
             音频文案轨POST /jobs/{id}/transcribetriggerTranscribe若尚未拆轨,先从 source.mp4 提取 audio.wav 并回填 source_audio_url;随后用 ASR 提取原始文案,翻译成中文,写入 audio_script.source_textsource_zh 和逐句 transcript。远端 ASR_MODEL 失败后先走本机 LOCAL_ASR_BIN/LOCAL_ASR_MODEL(默认 mlx_whisper),再尝试 ASR_FALLBACK_MODEL。后端会拒绝重复文本、逐秒假字幕或覆盖率过低的结果,不再把不可听的多模态输出写进时间轴。中文翻译由 TRANSLATE_MODEL 按 ASR 段落补齐,失败时保留原文时间轴且中文可为空。再用 ASR_FALLBACK_MODEL 读取 audio.wav 和已有转写时间轴,多模态音频分析讲话人、语速节奏、停顿、背景音乐/环境声/音效,写入 speaker_profilerhythm_profilebackground_audio_profile;若模型分析失败,则用转写段落、时长和语速做本地估算兜底。当前第一步不默认生成 SKG 新口播和 Azure OpenAI 配音。
-            分镜脚本改写POST /jobs/{id}/script/rewriterewriteStoryboardScript根据原参考文案、当前新口播、分镜角色、时间段和作者想法改写中文口播。mode=segment 只改一段;mode=all 一次改完整片,要求整片前后连贯。后端按 AUDIO_REWRITE_MODELASR_FALLBACK_MODELTRANSLATE_MODEL 依次尝试,全部失败时用本地模板保留可编辑文案。接口只返回 items[index,text],前端暂存在当前页面状态里,点击保存规划后写入 StoryboardScene.action。
+            分镜脚本改写POST /jobs/{id}/script/rewriterewriteStoryboardScript根据原英文参考文案、当前英文新口播、英文 role enum、时间段和作者想法改写英文口播;作者想法若含中文,后端会先经 _ensure_english 兜底翻译。mode=segment 只改一段;mode=all 一次改完整片,要求整片前后连贯。后端按 AUDIO_REWRITE_MODELASR_FALLBACK_MODELTRANSLATE_MODEL 依次尝试,全部失败时用英文本地模板保留可编辑文案。接口返回 items[index,text,text_zh],其中 text 是写入模型链路的英文主值,text_zh 只供团队审稿镜像显示;点击保存规划后写入 StoryboardScene.action。
             原始音频文件GET /jobs/{id}/audio.wavsourceAudioUrl返回拆轨得到的 wav;当前主界面不再渲染底部吸附音频条,右侧复刻工作表会读取该文件生成参考图式横向响度波形,并和原视频、逐句时间轴联动;波形标题栏显示当前播放秒数、总时长和鼠标指针停点秒数。
             改写配音文件GET /jobs/{id}/audio-script.mp3apiAssetUrl(job.audio_script.voice_url)后续新配音阶段保留的 TTS 产物;服务端固定走 VOICE_PROVIDER=azure_openai,通过 AZURE_OPENAI_BASE_URL 的 OpenAI 协议生成 mp3,并按 AZURE_TTS_PATHS 依次尝试 /audio/speech/v1/audio/speech 等路径。当前第一步不默认生成该文件。
             手动加帧POST /jobs/{id}/frames?t=addManualFrame按视频时间戳抽一帧,index 递增但 frames 按 timestamp 排序。当前主界面会把原版视频播放器的播放秒数传给 AudioIntakePanel 标题栏右侧的“当前点抽帧”。
@@ -1035,6 +1038,19 @@ ProductRefStateItem {
         

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-18 · 模型 prompt 语言策略切到英文主值

+ Prompt + UI + API +
+
+

问题:最终产物是英文 TikTok 二创广告,但前端默认分镜规划、首尾帧 prompt 和用户可编辑规划字段里混入中文,导致 gpt-image-2、脚本改写和后续视频模型收到中英混杂指令。

+

改动:AudioStoryboardRow.role 改为 hook/pain/proof/solution/cta/bridge 英文枚举,UI 仍显示中文角色标签。buildAudioStoryboardRowsbuildVisualPlanbuildFirstFramePlanbuildLastFramePlanbuildSubjectDescriptionbuildEndpointFramePromptbuildStoryboardSceneFromAudioRow 的模型主字段改为英文,并新增 *Zh 镜像用于团队阅读。首尾帧提交前前端会对含中文 prompt 调 translateText 兜底,后端新增 _ensure_english 并挂到 generate_scene_assetgenerate_subject_assets、脚本改写和音频分析入口。

+

影响:发给 LLM / 生图 / 视频模型的主 prompt 默认全英文;中文只作为团队审稿镜像、UI 标签和 toast。AudioScript 新增 rewritten_text_zhKeyElement 新增 subject_consensus_brief_zhSubjectTemplateItem 新增 prompt_brief_zhPOST /jobs/{id}/script/rewrite 返回 text 英文主值和 text_zh 中文镜像。

+
+

2026-05-18 · 首尾帧改为主体 brief + 产品少量硬参考

diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index ce706bb..0b000cd 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -54,6 +54,7 @@ import { updateElement, updateStoryboard, uploadStoryboardAsset, + translateText, videoUrl, } from "@/lib/api" import { type NodeData } from "@/components/nodes" @@ -83,6 +84,7 @@ const VIDEO_MODELS = [ type VideoModel = (typeof VIDEO_MODELS)[number]["value"] type BoardThemeMode = "dark" | "light" +type AudioStoryboardRole = "hook" | "pain" | "proof" | "solution" | "cta" | "bridge" const BOARD_THEME_STORAGE_KEY = "skg-board-theme" @@ -103,19 +105,28 @@ type AudioStoryboardRow = { start: number end: number source: string - role: string + sourceZh: string + role: AudioStoryboardRole visualMode: StoryboardVisualMode needsProduct: boolean needsSubject: boolean subjectDescription: string + subjectDescriptionZh: string skgCopy: string + skgCopyZh: string visualPlan: string + visualPlanZh: string firstFramePlan: string + firstFramePlanZh: string lastFramePlan: string + lastFramePlanZh: string referencePlan: string keyElements: string + keyElementsZh: string productIntegration: string + productIntegrationZh: string productPlacement: string + productPlacementZh: string } type ProductRefItem = ProductRefStateItem @@ -136,7 +147,7 @@ type ResolvedSubjectProfile = { payload: SubjectProfilePreference } type StoryboardVisualMode = NonNullable -type RowPlanPatch = Partial> +type RowPlanPatch = Partial> type WorkflowStepId = "input" | "source" | "audio" | "visual" | "subject" | "product" | "script" | "scene" | "video" type WorkflowStepStatus = "blocked" | "pending" | "running" | "ready" | "paused" type WorkflowStep = { @@ -324,6 +335,52 @@ const PRODUCT_USE_TAG_LABELS: Record = { material_texture: "材质", } +const ROLE_LABELS_ZH: Record = { + hook: "开场钩子", + pain: "痛点推进", + proof: "利益证明", + solution: "方案过渡", + cta: "转化收口", + bridge: "节奏承接", +} + +const ROLE_LABELS_EN: Record = { + hook: "hook", + pain: "pain build", + proof: "benefit proof", + solution: "solution transition", + cta: "conversion close", + bridge: "rhythm bridge", +} + +const PRODUCT_VIEW_PROMPT_LABELS: Record = { + front: "front / outer shell", + left_45: "wearer's left 45-degree view", + right_45: "wearer's right 45-degree view", + side_thickness: "side thickness view", + inner_contacts: "inner neck-contact pads", + back_bottom: "back / bottom structure", +} + +const PRODUCT_BACKGROUND_PROMPT_LABELS: Record = { + white: "white background", + black: "black background", + simple: "simple solid background", + complex: "complex background", + unknown: "unknown background", +} + +const PRODUCT_USE_TAG_PROMPT_LABELS: Record = { + hero_packshot: "hero packshot", + wearing_scale: "wearing scale", + inner_contact: "inner contact pads", + side_thickness: "side thickness", + asymmetry: "left-right asymmetry", + button_detail: "button detail", + back_bottom: "back/bottom structure", + material_texture: "material texture", +} + const controlClass = "h-10 rounded-md border border-white/10 bg-black/55 px-3 text-[12px] text-white outline-none transition focus:border-cyan-300/60 disabled:cursor-not-allowed disabled:opacity-40" @@ -352,6 +409,20 @@ function shortId(id?: string | null) { return id ? id.slice(0, 8) : "-" } +function containsCjk(text: string) { + return /[\u3400-\u9fff]/.test(text) +} + +async function ensureEnglishForModel(text: string) { + const trimmed = text.trim() + if (!trimmed || !containsCjk(trimmed)) return trimmed + try { + return await translateText(trimmed, "en") + } catch { + return trimmed + } +} + function subjectProfileOption(category: SubjectProfileCategory, value: string) { return category.options.find((option) => option.value === value) ?? category.options[0] } @@ -373,6 +444,16 @@ function resolveSubjectProfile( const values = { ...DEFAULT_SUBJECT_PROFILE_DRAFT } const labelParts: string[] = [] const promptParts: string[] = [] + const promptLabelByKey: Record = { + gender: "gender presentation", + age: "age range", + wardrobe: "wardrobe style", + region_ethnicity: "regional or ethnic appearance cues", + skin_tone: "skin tone", + body: "body proportion", + hair: "hair style", + mood: "commercial mood", + } for (const category of SUBJECT_PROFILE_CATEGORIES) { const rawValue = draft[category.key] || "random" let option = subjectProfileOption(category, rawValue) @@ -382,7 +463,7 @@ function resolveSubjectProfile( } values[category.key] = option.value labelParts.push(`${category.label}:${option.label}`) - promptParts.push(`${category.label}: ${option.prompt}`) + promptParts.push(`${promptLabelByKey[category.key]}: ${option.prompt}`) } const summary = labelParts.join(" / ") const promptSummary = promptParts.join("; ") @@ -393,14 +474,14 @@ function resolveSubjectProfile( promptSummary, payload: { mode, - gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).label, - age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).label, - wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).label, - region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).label, - skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).label, - body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).label, - hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).label, - mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).label, + gender: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[0], values.gender).prompt, + age: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[1], values.age).prompt, + wardrobe: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[2], values.wardrobe).prompt, + region_ethnicity: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[3], values.region_ethnicity).prompt, + skin_tone: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[4], values.skin_tone).prompt, + body: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[5], values.body).prompt, + hair: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[6], values.hair).prompt, + mood: subjectProfileOption(SUBJECT_PROFILE_CATEGORIES[7], values.mood).prompt, resolved_summary: summary, prompt_summary: promptSummary, }, @@ -828,161 +909,254 @@ function buildFallbackScene(job: Job, frame: KeyFrame, order: number): Storyboar const duration = Math.max(3.5, Math.min(7.5, Math.max(job.duration || 0, frames.length * 5) / Math.max(frames.length, 1))) const audio = job.audio_script?.rewritten_text?.trim() || job.transcript?.slice(0, 4).map((item) => item.en || item.zh).filter(Boolean).join(" ") - || "按原音频说话节奏改写为 SKG 产品介绍。" + || "Rewrite the original audio pacing into a new SKG product introduction." const objects = frame.description?.objects?.slice(0, 5).map((item) => item.name).filter(Boolean).join("、") return { duration: Number(duration.toFixed(1)), first_image: null, last_image: null, - subject: objects ? `关键元素候选:${objects}` : "保留原视频最重要的主体动作和构图关系。", - scene: `${frame.description?.scene || `按第 ${order + 1} 段音频规划 SKG 信息流广告分镜。`}\n音频节奏依据:${audio.slice(0, 220)}`, - product: "把原素材里的产品/痛点转成 SKG 颈部/肩颈按摩仪表达,默认使用 SKG 四张产品角度图做产品真源。", + subject: objects ? `Key element candidates: ${objects}` : "Keep the source video's most important subject motion and composition relationship.", + scene: `${frame.description?.scene || `Plan SKG information-feed ad scene ${order + 1} from the audio segment.`}\nAudio pacing reference: ${audio.slice(0, 220)}`, + product: "Convert the source product or pain-point context into SKG neck-and-shoulder massager expression. Use the uploaded SKG product angles as product truth.", action: frame.description?.style - ? `沿用原画面的讲话节奏、动作节点和 ${frame.description.style},突出使用前紧绷、使用后放松。` - : "沿用原视频的讲话节奏和动作节点,突出使用前紧绷、使用后放松。", + ? `Keep the source speaking rhythm, action beats, and ${frame.description.style}; show tension before use and relaxed comfort after use.` + : "Keep the source speaking rhythm and action beats; show tension before use and relaxed comfort after use.", reference_ids: [], } } -function classifyAudioRole(text: string, index: number, total: number) { +function classifyAudioRole(text: string, index: number, total: number): AudioStoryboardRole { const lower = text.toLowerCase() - if (index === 0) return "开场钩子" - if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "转化收口" - if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "痛点推进" - if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "利益证明" - if (/use|try|apple|product|bottle|one month/.test(lower)) return "方案过渡" - return "节奏承接" + if (index === 0) return "hook" + if (index >= total - 2 || /discount|code|shipping|link|limited|sold out|grab|recommend|tiktok/.test(lower)) return "cta" + if (/can't|dont|don't|if |when |tired|stress|pain|crave|bloated|puffy|ready/.test(lower)) return "pain" + if (/help|can |reduce|improve|relax|lower|stabilize|clear|less/.test(lower)) return "proof" + if (/use|try|apple|product|bottle|one month/.test(lower)) return "solution" + return "bridge" } -function buildSkgCopy(role: string, index: number) { - const variants: Record = { - "开场钩子": [ +function buildSkgCopy(role: AudioStoryboardRole, index: number) { + const variants: Record = { + hook: [ + "If you spend hours looking down at your phone or working at a desk, your neck and shoulders may already be carrying that tension.", + "A few hours on screens can make your neck and shoulders feel tired faster than you expect.", + ], + pain: [ + "That tight neck, heavy shoulder feeling, and uncomfortable head lift are signs you should not wait to deal with it.", + "Commuting, desk work, parenting, and phone scrolling can keep your neck and shoulders tense all day.", + ], + proof: [ + "The SKG neck-and-shoulder massager sits around the back of your neck and shoulders, bringing warmth and kneading-like comfort right where you feel tight.", + "Wear it hands-free between work, at home, or before bed to settle into a calmer relaxation rhythm.", + ], + solution: [ + "This beat turns the source explanation into a clear SKG routine: pick it up, wear it, adjust the fit, and relax.", + "Let the product enter naturally, and show the change from neck tension to a more relaxed state.", + ], + cta: [ + "If you want neck-and-shoulder relaxation to become part of your daily routine, this SKG massager is an easy place to start.", + "Close with a clear product detail and a relaxed expression so viewers know exactly what to try next.", + ], + bridge: [ + "Keep the source video's short, fast rhythm, but anchor each line in a specific neck-and-shoulder moment or product action.", + "Use this line as a bridge from the pain point into the SKG routine without slowing the pace.", + ], + } + const list = variants[role] ?? variants.bridge + return list[index % list.length] +} + +function buildSkgCopyZh(role: AudioStoryboardRole, index: number) { + const variants: Record = { + hook: [ "如果你也经常低头刷手机、久坐办公,肩颈紧绷可能已经在悄悄影响状态。", "每天盯屏几个小时,脖子和肩膀的疲惫会比你想得更早出现。", ], - "痛点推进": [ + pain: [ "脖子发紧、肩膀沉、抬头不舒服,不一定要等到很难受才处理。", "通勤、办公、带娃、刷手机叠在一起,肩颈很容易一直处在紧绷状态。", ], - "利益证明": [ + proof: [ "SKG 颈部按摩仪贴合后颈和肩颈两侧,把热敷感和揉按感带到真正紧的位置。", "戴上后不用占手,工作间隙、居家放松、睡前都能快速进入舒缓节奏。", ], - "方案过渡": [ + solution: [ "这一镜把原片的讲解节奏换成 SKG 使用步骤:拿起、佩戴、贴合、放松。", "让产品自然进入画面,重点不是硬推,而是把肩颈紧绷到放松的变化拍清楚。", ], - "转化收口": [ + cta: [ "如果你也想把肩颈放松变成日常习惯,可以先从这台 SKG 开始。", "最后用清晰产品特写和轻松状态收住,让用户知道现在就可以入手。", ], - "节奏承接": [ + bridge: [ "延续原片短句快节奏,把每一句都落到一个具体肩颈场景或产品动作。", "这一句作为过渡,画面从痛点切到产品,让节奏继续往下走。", ], } - const list = variants[role] ?? variants["节奏承接"] + const list = variants[role] ?? variants.bridge return list[index % list.length] } -function buildVisualPlan(role: string) { - if (role === "开场钩子") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。" - if (role === "痛点推进") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。" - if (role === "利益证明") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。" - if (role === "转化收口") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。" +function buildVisualPlan(role: AudioStoryboardRole) { + if (role === "hook") return "Vertical close-up creator opening. The subject gently rubs the neck or rotates the shoulders to establish fatigue immediately." + if (role === "pain") return "Keep the source expression, gesture rhythm, and fast pacing while emphasizing phone posture, desk sitting, and neck-and-shoulder tension." + if (role === "proof") return "Bring the product into frame and place it around the back of the neck, then cut to fit, button, warmth, and kneading-comfort details." + if (role === "cta") return "End with a clean product detail plus a relaxed expression, keeping the quick action feeling of a feed ad." + return "Keep the source-style composition and camera movement, but replace the content with an SKG neck-and-shoulder relaxation scene." +} + +function buildVisualPlanZh(role: AudioStoryboardRole) { + if (role === "hook") return "竖屏近景口播开场,人物轻揉脖子或转动肩颈,直接建立疲惫感。" + if (role === "pain") return "沿用原视频的表情、手势和节奏,画面强调低头、久坐、肩颈紧绷。" + if (role === "proof") return "产品进入画面并佩戴到后颈,切到肩颈贴合、按键、热敷/揉按感的细节。" + if (role === "cta") return "产品清晰特写 + 人物放松表情收尾,保留信息流广告的快速行动感。" return "保持原片同类构图和运镜,把画面内容替换成 SKG 肩颈放松场景。" } -function visualModeDefaults(mode: StoryboardVisualMode) { +function visualModeDefaults(mode: StoryboardVisualMode, language: "en" | "zh" = "en") { if (mode === "person_only") { return { needsProduct: false, needsSubject: true, - productPlacement: "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。", + productPlacement: language === "zh" + ? "本条不出现产品,只用人物状态、痛点或口播承接节奏;不要硬插 SKG 产品。" + : "Do not show the product in this beat. Use the subject's state, pain point, or voice-over performance to carry the rhythm; do not force in the SKG product.", } } if (mode === "product_only") { return { needsProduct: true, needsSubject: false, - productPlacement: "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。", + productPlacement: language === "zh" + ? "只展示 SKG 肩颈按摩仪本体、佩戴角度或功能细节;不要强行加入人物。" + : "Show only the SKG neck-and-shoulder massager, wearing angle, or functional detail; do not force a main character into this beat.", } } if (mode === "environment") { return { needsProduct: false, needsSubject: false, - productPlacement: "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。", + productPlacement: language === "zh" + ? "本条作为场景/情绪/节奏过渡,不出现产品和人物主体;只保留空间、光线和运动节奏。" + : "Use this beat as a scene, mood, or pacing transition. Do not show the product or main subject; keep only space, light, and motion rhythm.", } } return { needsProduct: true, needsSubject: true, - productPlacement: "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。", + productPlacement: language === "zh" + ? "SKG 肩颈按摩仪作为外置佩戴产品出现,围绕拿起、佩戴、调整、按键或放松状态展开。" + : "Show the SKG neck-and-shoulder massager as an external wearable product, built around picking it up, wearing it, adjusting it, pressing controls, or relaxing with it.", } } -function visualModeForRole(role: string): StoryboardVisualMode { - if (role === "开场钩子" || role === "痛点推进") return "person_only" - if (role === "转化收口") return "product_only" - if (role === "节奏承接") return "environment" +function visualModeForRole(role: AudioStoryboardRole): StoryboardVisualMode { + if (role === "hook" || role === "pain") return "person_only" + if (role === "cta") return "product_only" + if (role === "bridge") return "environment" return "person_product" } -function buildFirstFramePlan(role: string) { - if (role === "开场钩子") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。" - if (role === "痛点推进") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。" - if (role === "利益证明") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。" - if (role === "方案过渡") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。" - if (role === "转化收口") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。" +function buildFirstFramePlan(role: AudioStoryboardRole) { + if (role === "hook") return "Close-up subject looking at camera or working with head down, one hand lightly touching the back of the neck, with no product visible yet." + if (role === "pain") return "Preserve the source action rhythm while making neck tension, looking down, neck rubbing, or desk-sitting posture clear." + if (role === "proof") return "The subject picks up or prepares to wear the SKG neck-and-shoulder massager; product position is clear but the action has just started." + if (role === "solution") return "Move from the pain state into picking up the product or bringing it toward the neck and shoulders, ready to begin use." + if (role === "cta") return "Clean product close-up or stable worn-product frame, leaving a strong visual focus for the conversion close." + return "Start from the current source sentence's composition to carry the rhythm without forcing a subject change." +} + +function buildFirstFramePlanZh(role: AudioStoryboardRole) { + if (role === "hook") return "人物近景看向镜头或低头办公,手轻扶后颈,画面先不露产品。" + if (role === "pain") return "保留原片人物动作节奏,肩颈紧绷、低头、揉脖子或久坐状态明确。" + if (role === "proof") return "人物拿起或准备佩戴 SKG 肩颈按摩仪,产品位置清晰但动作刚开始。" + if (role === "solution") return "人物从痛点状态切到拿起产品/靠近肩颈,准备进入使用动作。" + if (role === "cta") return "产品干净特写或佩戴完成后的稳定画面,留出转化收口的视觉焦点。" return "按原视频当前句的构图启动,先承接节奏,不强行改变镜头主体。" } -function buildLastFramePlan(role: string) { - if (role === "开场钩子") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。" - if (role === "痛点推进") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。" - if (role === "利益证明") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。" - if (role === "方案过渡") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。" - if (role === "转化收口") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。" +function buildLastFramePlan(role: AudioStoryboardRole) { + if (role === "hook") return "The subject lifts the head or becomes more focused, leaving room for the product or solution to enter in the next beat." + if (role === "pain") return "Amplify the tense state into a clear stopping point, ready to cut into the product solution." + if (role === "proof") return "The product is correctly worn around the back of the neck and shoulders, the subject looks more relaxed, and product scale is stable." + if (role === "solution") return "The product fits against the neck and shoulders, hand adjustment is complete, and the frame can move into functional detail or relaxation." + if (role === "cta") return "Hold a stable product or worn-product frame with clean composition, ready for purchase or action-call continuation." + return "Advance the action slightly and hold a stable endpoint that connects naturally to the next sentence." +} + +function buildLastFramePlanZh(role: AudioStoryboardRole) { + if (role === "hook") return "人物抬头或表情更集中,给下一镜产品或方案进入留出空间。" + if (role === "pain") return "紧绷状态被放大到一个明确停点,准备切入产品解决方案。" + if (role === "proof") return "产品已正确佩戴在后颈/肩颈位置,人物放松,产品比例稳定。" + if (role === "solution") return "产品贴合肩颈,手部调整完成,画面自然进入功能细节或放松状态。" + if (role === "cta") return "产品或佩戴状态稳定收住,画面干净,适合后续接购买/行动号召。" return "动作小幅推进并稳定停住,保留与下一句衔接的方向感。" } -function buildSubjectDescription(role: string, visualMode: StoryboardVisualMode) { +function buildSubjectDescription(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) { + if (visualMode === "product_only" || visualMode === "environment") return "" + const base = "Consistent similar subject: a friendly transparent or semi-transparent humanoid with visible clean white skeleton inside, commercial not horror, with neck, collarbone, and upper-back areas clear for wearing a neck-and-shoulder massager." + if (role === "hook") return `${base} Front or upper-body creator speaking state, with a pain-point or curious expression that grabs attention quickly.` + if (role === "pain") return `${base} Neck-and-shoulder tension, looking down, desk posture, or rubbing the neck; make the neck line, shoulders, and upper back readable.` + if (role === "proof") return `${base} Relaxed state while wearing or about to wear the product, prioritizing neck-and-shoulder close-up, side, and back-neck angles.` + if (role === "solution") return `${base} Hands adjust the product or show wearable fit naturally; product placement must not hide important anatomy or device structure.` + if (role === "cta") return `${base} Stable, relaxed, clean ending state using front, three-quarter, or stable worn-product framing.` + return `${base} Keep one consistent subject identity, material, body type, gender presentation, and commercial mood across the whole video.` +} + +function buildSubjectDescriptionZh(role: AudioStoryboardRole, visualMode: StoryboardVisualMode) { if (visualMode === "product_only" || visualMode === "environment") return "" const base = "统一相似主体:透明或半透明皮肤包裹可见白色骨架的人形,广告感、非恐怖、肩颈/锁骨/上背区域清晰,适合佩戴肩颈按摩仪。" - if (role === "开场钩子") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。` - if (role === "痛点推进") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。` - if (role === "利益证明") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。` - if (role === "方案过渡") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。` - if (role === "转化收口") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。` + if (role === "hook") return `${base} 正面或半身口播状态,表情有痛点或好奇感,能快速抓住注意。` + if (role === "pain") return `${base} 肩颈紧绷、低头久坐或按揉脖子的状态,重点看清脖子、肩线和上背。` + if (role === "proof") return `${base} 产品佩戴或即将佩戴的放松状态,优先肩颈近景、侧面和后颈肩背角度。` + if (role === "solution") return `${base} 手部调整产品或展示佩戴贴合感,人物姿态自然,产品位置不能挡住关键结构。` + if (role === "cta") return `${base} 状态稳定、放松、干净收尾,可用正面/三分之二视角或产品佩戴后的稳定状态。` return `${base} 保持与整片一致的主体身份、材质、体型、性别表现和广告气质。` } function buildAudioStoryboardRows(job: Job | null): AudioStoryboardRow[] { if (!job?.transcript.length) return [] return job.transcript.map((segment, index) => { - const source = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充" + const source = segment.en?.trim() || segment.zh?.trim() || "Source audio script pending." + const sourceZh = segment.zh?.trim() || segment.en?.trim() || "原音频文案待补充" const role = classifyAudioRole(`${segment.en} ${segment.zh}`, index, job.transcript.length) const visualMode = visualModeForRole(role) const defaults = visualModeDefaults(visualMode) + const defaultsZh = visualModeDefaults(visualMode, "zh") + const keyElements = role === "proof" + ? "wearing action, product position, hand pressing the control, relaxed expression" + : "creator framing, subject gesture, facial rhythm, scene lighting" + const keyElementsZh = role === "proof" + ? "佩戴动作、产品位置、手部按键、放松表情" + : "口播构图、人物动作、表情节奏、场景光线" return { index: segment.index, start: segment.start, end: segment.end, source, + sourceZh, role, visualMode, needsProduct: defaults.needsProduct, needsSubject: defaults.needsSubject, subjectDescription: buildSubjectDescription(role, visualMode), + subjectDescriptionZh: buildSubjectDescriptionZh(role, visualMode), skgCopy: buildSkgCopy(role, index), + skgCopyZh: buildSkgCopyZh(role, index), visualPlan: buildVisualPlan(role), + visualPlanZh: buildVisualPlanZh(role), firstFramePlan: buildFirstFramePlan(role), + firstFramePlanZh: buildFirstFramePlanZh(role), lastFramePlan: buildLastFramePlan(role), - referencePlan: `从原视频 ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s 定向抽 1-2 张参考帧。`, - keyElements: role === "利益证明" ? "佩戴动作、产品位置、手部按键、放松表情" : "口播构图、人物动作、表情节奏、场景光线", - productIntegration: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。", + lastFramePlanZh: buildLastFramePlanZh(role), + referencePlan: `Extract 1-2 targeted reference frames from source video ${segment.start.toFixed(1)}-${segment.end.toFixed(1)}s.`, + keyElements, + keyElementsZh, + productIntegration: "Replace the source product or prop context with the SKG white U-shaped neck-and-shoulder massager. The product must be worn externally around the neck and shoulders.", + productIntegrationZh: "把原片产品/道具语境替换为 SKG 白色 U 形颈部按摩仪,产品必须外置佩戴在肩颈位置。", productPlacement: defaults.productPlacement, + productPlacementZh: defaultsZh.productPlacement, } }) } @@ -1173,14 +1347,14 @@ function productReferenceNotes(items: ProductRefItem[]) { if (!items.length) return "" return items .map((item, index) => { - const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_LABELS[tag]).filter(Boolean).join("/") + const tags = item.useTags.map((tag) => PRODUCT_USE_TAG_PROMPT_LABELS[tag] ?? tag).filter(Boolean).join(", ") const orientation = formatProductOrientation(item.orientation) - const direction = orientation ? `;方向:${orientation}` : "" - const landmarks = item.landmarks.length ? `;结构:${item.landmarks.join("/")}` : "" - const risk = item.risk ? `;风险:${item.risk}` : "" - return `${index + 1}. ${productViewLabel(item.view)}|${productBackgroundLabel(item.background)}|${tags}:${item.note || "无补充备注"}${direction}${landmarks}${risk}` + const direction = orientation ? `; orientation: ${orientation}` : "" + const landmarks = item.landmarks.length ? `; structural landmarks: ${item.landmarks.join(", ")}` : "" + const risk = item.risk ? `; risk: ${item.risk}` : "" + return `${index + 1}. ${PRODUCT_VIEW_PROMPT_LABELS[item.view] ?? item.view} | ${PRODUCT_BACKGROUND_PROMPT_LABELS[item.background] ?? item.background} | ${tags || "general product reference"}: ${item.note || "no extra note"}${direction}${landmarks}${risk}` }) - .join(";") + .join("; ") } function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch { @@ -1189,11 +1363,11 @@ function savedScenePatch(scene?: StoryboardScene | null): RowPlanPatch { visualMode: scene.visual_mode, needsProduct: scene.needs_product, needsSubject: scene.needs_subject, - subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(), - visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(), + subjectDescription: scene.subject?.split("\n").find((line) => line.trim() && !line.startsWith("Subject source") && !line.startsWith("No main subject") && !line.startsWith("主体真源") && !line.startsWith("本条不需要"))?.trim(), + visualPlan: scene.scene?.split("\n").find((line) => line.trim() && !line.startsWith("Visual mode") && !line.startsWith("First-frame plan") && !line.startsWith("Last-frame plan") && !line.startsWith("Source audio reference") && !line.startsWith("镜头类型") && !line.startsWith("首帧规划") && !line.startsWith("尾帧规划") && !line.startsWith("原音频依据"))?.trim(), firstFramePlan: scene.first_frame_plan, lastFramePlan: scene.last_frame_plan, - productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(), + productIntegration: scene.product?.split("\n").find((line) => line.trim() && !line.startsWith("Product requirement") && !line.startsWith("Product placement") && !line.startsWith("Product reference pool") && !line.startsWith("No product") && !line.startsWith("This beat") && !line.startsWith("产品需求") && !line.startsWith("产品出现方式") && !line.startsWith("产品素材池") && !line.startsWith("未上传产品图") && !line.startsWith("本条规划"))?.trim(), productPlacement: scene.product_placement, } } @@ -1206,34 +1380,40 @@ function applyPlanPatch(row: AudioStoryboardRow, patch?: RowPlanPatch): AudioSto needsProduct: patch.needsProduct ?? row.needsProduct, needsSubject: patch.needsSubject ?? row.needsSubject, subjectDescription: patch.subjectDescription ?? row.subjectDescription, + subjectDescriptionZh: patch.subjectDescriptionZh ?? row.subjectDescriptionZh, visualPlan: patch.visualPlan ?? row.visualPlan, + visualPlanZh: patch.visualPlanZh ?? row.visualPlanZh, firstFramePlan: patch.firstFramePlan ?? row.firstFramePlan, + firstFramePlanZh: patch.firstFramePlanZh ?? row.firstFramePlanZh, lastFramePlan: patch.lastFramePlan ?? row.lastFramePlan, + lastFramePlanZh: patch.lastFramePlanZh ?? row.lastFramePlanZh, productIntegration: patch.productIntegration ?? row.productIntegration, + productIntegrationZh: patch.productIntegrationZh ?? row.productIntegrationZh, productPlacement: patch.productPlacement ?? row.productPlacement, + productPlacementZh: patch.productPlacementZh ?? row.productPlacementZh, } } function productPriorityForRow(row: AudioStoryboardRow) { - const viewPriorityByRole: Record = { - "开场钩子": ["front", "left_45", "right_45", "side_thickness"], - "痛点推进": ["front", "side_thickness", "left_45", "right_45"], - "利益证明": ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"], - "方案过渡": ["front", "left_45", "right_45", "inner_contacts", "side_thickness"], - "转化收口": ["front", "back_bottom", "left_45", "right_45", "inner_contacts"], - "节奏承接": ["front", "left_45", "right_45", "side_thickness"], + const viewPriorityByRole: Record = { + hook: ["front", "left_45", "right_45", "side_thickness"], + pain: ["front", "side_thickness", "left_45", "right_45"], + proof: ["inner_contacts", "side_thickness", "front", "left_45", "right_45", "back_bottom"], + solution: ["front", "left_45", "right_45", "inner_contacts", "side_thickness"], + cta: ["front", "back_bottom", "left_45", "right_45", "inner_contacts"], + bridge: ["front", "left_45", "right_45", "side_thickness"], } - const tagPriorityByRole: Record = { - "开场钩子": ["hero_packshot", "asymmetry", "side_thickness"], - "痛点推进": ["wearing_scale", "side_thickness", "hero_packshot"], - "利益证明": ["inner_contact", "wearing_scale", "button_detail", "side_thickness"], - "方案过渡": ["wearing_scale", "hero_packshot", "inner_contact"], - "转化收口": ["hero_packshot", "back_bottom", "asymmetry", "material_texture"], - "节奏承接": ["hero_packshot", "asymmetry", "side_thickness"], + const tagPriorityByRole: Record = { + hook: ["hero_packshot", "asymmetry", "side_thickness"], + pain: ["wearing_scale", "side_thickness", "hero_packshot"], + proof: ["inner_contact", "wearing_scale", "button_detail", "side_thickness"], + solution: ["wearing_scale", "hero_packshot", "inner_contact"], + cta: ["hero_packshot", "back_bottom", "asymmetry", "material_texture"], + bridge: ["hero_packshot", "asymmetry", "side_thickness"], } return { - views: viewPriorityByRole[row.role] ?? viewPriorityByRole["节奏承接"], - tags: tagPriorityByRole[row.role] ?? tagPriorityByRole["节奏承接"], + views: viewPriorityByRole[row.role] ?? viewPriorityByRole.bridge, + tags: tagPriorityByRole[row.role] ?? tagPriorityByRole.bridge, } } @@ -1245,17 +1425,17 @@ function endpointProductPriority(row: AudioStoryboardRow, role?: "first_frame" | if (!views.includes(view)) views.push(view) if (tag && !tags.includes(tag)) tags.push(tag) } - if (/后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom") - if (/侧面|侧身|厚度|侧厚|体积|左侧|右侧|45|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness") - if (/内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact") - if (/佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale") - if (/按键|按钮|控制|开关|logo/.test(text)) add("right_45", "button_detail") + if (/back neck|neck back|upper back|back view|back side|shoulder blade|last frame|worn|wearing complete|fit complete|后颈|肩背|背面|背部|后背|上背|尾帧|佩戴完成|贴合完成/.test(text)) add("back_bottom", "back_bottom") + if (/side|profile|thickness|volume|left side|right side|45|adjust|pick up|bring.*neck|toward.*shoulder|侧面|侧身|厚度|侧厚|体积|左侧|右侧|调整|拿起|靠近肩颈/.test(text)) add("side_thickness", "side_thickness") + if (/inner|contact pad|massage head|touching skin|neck contact|skin contact|内侧|触点|按摩头|贴颈|接触|皮肤接触/.test(text)) add("inner_contacts", "inner_contact") + if (/wearing scale|upper body|worn on human|neck|shoulder|collarbone|佩戴比例|上身|真人佩戴|脖子|肩颈|锁骨/.test(text)) add("left_45", "wearing_scale") + if (/button|control|switch|logo|按键|按钮|控制|开关/.test(text)) add("right_45", "button_detail") return { views, tags } } function endpointProductMaxForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") { const text = `${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productIntegration} ${row.productPlacement} ${role ?? ""}`.toLowerCase() - return /侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text) + return /side|profile|thickness|back neck|upper back|back view|inner|contact pad|massage head|neck contact|close-up|closeup|button|control|worn|wearing complete|侧面|侧身|厚度|侧厚|后颈|肩背|背面|背部|内侧|触点|按摩头|贴颈|特写|近景|按键|按钮|佩戴完成|上背/.test(text) ? MAX_PRODUCT_REFS_PER_ENDPOINT : 1 } @@ -1336,26 +1516,42 @@ function subjectViewRoleHint(view: string) { return hints[view] ?? "主体参考视角" } +function subjectViewPromptHint(view: string) { + const hints: Record = { + front: "front speaking shot, opening hook, expression, conversion close", + three_quarter_left: "left three-quarter angle, talking, pre-wear motion, natural turn", + three_quarter_right: "right three-quarter angle, talking, pre-wear motion, natural turn", + left: "left side, neck-and-shoulder side profile, wearing action, product thickness and position", + right: "right side, neck-and-shoulder side profile, wearing action, product thickness and position", + back: "back view, back neck and upper shoulders, product placement landing", + bust_front: "front neck-and-shoulder close-up, pain-point expression, wearing scale", + bust_left_45: "left three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit", + bust_right_45: "right three-quarter neck-and-shoulder close-up, hand adjustment, wearable fit", + back_neck_detail: "back-neck and upper-back detail, contact-pad position, product fit", + } + return hints[view] ?? "subject reference view" +} + function subjectDescriptionForRow(row: AudioStoryboardRow, subjectRefs: SubjectPlanningRef[]) { const trimmed = row.subjectDescription.trim() if (trimmed) return trimmed - const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join("、") + const labels = subjectRefs.slice(0, 4).map((ref) => ref.label || subjectViewLabel(ref.view)).join(", ") return [ - "统一相似主体:使用已生成的主体视图作为人物真源,保持同一人物身份、体型、材质、年龄段、性别表现和广告气质。", - labels ? `可用主体视角:${labels}。` : "", - "如果本条需要人物但缺少更具体描述,默认保持透明皮肤包裹白色骨架、非恐怖、肩颈区域清晰可佩戴产品。", + "Consistent similar subject: use the generated subject view pack as the character truth, maintaining one identity, body proportion, material, age range, gender presentation, and commercial mood.", + labels ? `Available subject views: ${labels}.` : "", + "If this beat needs a subject but lacks a specific description, default to a friendly transparent skin shell with visible white skeleton, non-horror, with clear neck and shoulder area for wearable product placement.", ].filter(Boolean).join("") } function subjectPriorityForRow(row: AudioStoryboardRow, role?: "first_frame" | "last_frame") { const text = `${row.role} ${row.visualMode} ${row.subjectDescription} ${row.visualPlan} ${row.firstFramePlan} ${row.lastFramePlan} ${row.productPlacement}`.toLowerCase() - if (/后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) { + if (/back neck|upper back|shoulder blade|back view|fit|worn|wearing complete|correctly worn|后颈|肩背|上背|背面|背部|贴合|佩戴完成|已正确佩戴/.test(text)) { return ["back_neck_detail", "back", "bust_left_45", "bust_right_45", "left", "right", "bust_front", "three_quarter_left", "three_quarter_right", "front"] } - if (/侧面|左侧|右侧|45|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) { + if (/side|left|right|45|adjust|pick up|prepare to wear|toward.*neck|hand|侧面|左侧|右侧|调整|拿起|准备佩戴|靠近肩颈|手部/.test(text)) { return ["bust_left_45", "bust_right_45", "left", "right", "three_quarter_left", "three_quarter_right", "bust_front", "front", "back_neck_detail", "back"] } - if (/近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) { + if (/close-up|closeup|upper-body|bust|neck|shoulder|collarbone|rubbing.*neck|looking down|tense|tension|近景|半身|肩颈|锁骨|脖子|揉脖子|低头|紧绷/.test(text)) { return ["bust_front", "bust_left_45", "bust_right_45", "front", "three_quarter_left", "three_quarter_right", "left", "right", "back_neck_detail", "back"] } if (role === "last_frame" && row.needsProduct) { @@ -1371,8 +1567,8 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR .map((ref, index) => { const rank = priority.indexOf(ref.view) const labelText = `${ref.label || ""} ${ref.roleHint}`.toLowerCase() - const closeupScore = /肩颈|后颈|近景|贴合|佩戴/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement) - && /bust|neck|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`) + const closeupScore = /neck|shoulder|back neck|close-up|closeup|fit|wear|佩戴|肩颈|后颈|近景|贴合/.test(row.visualPlan + row.firstFramePlan + row.lastFramePlan + row.productPlacement) + && /bust|neck|close-up|closeup|近景|肩颈|后颈/.test(`${ref.view} ${labelText}`) ? 12 : 0 return { ref, score: (rank >= 0 ? 100 - rank * 8 : 0) + closeupScore - index } @@ -1383,7 +1579,7 @@ function selectSubjectRefsForRow(row: AudioStoryboardRow, refs: SubjectPlanningR } function subjectReferenceNotes(refs: SubjectPlanningRef[]) { - return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)}|${ref.roleHint}`).join(";") + return refs.map((ref, index) => `${index + 1}. ${ref.label || subjectViewLabel(ref.view)} | ${subjectViewPromptHint(ref.view)}`).join("; ") } function subjectAssetRefsForPlanning(source: { frame: KeyFrame; element: KeyElement } | null): SubjectPlanningRef[] { @@ -1432,19 +1628,20 @@ function buildEndpointFramePrompt(row: AudioStoryboardRow, role: "first_frame" | const opposite = role === "first_frame" ? row.lastFramePlan : row.firstFramePlan const productNotes = selectedProductItems.length ? productReferenceNotes(selectedProductItems) : "" return [ - `分镜 ${row.index + 1} ${role === "first_frame" ? "首帧" : "尾帧"}。`, - `新口播文案:${row.skgCopy}`, - `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}。`, - `当前要生成的画面:${target}`, - `另一端画面用于连续性参考:${opposite}`, - `画面规划:${row.visualPlan}`, + `Storyboard beat ${row.index + 1}, ${role === "first_frame" ? "first frame" : "last frame"}.`, + `New English voice-over line: ${row.skgCopy}`, + `Narrative role: ${ROLE_LABELS_EN[row.role]}.`, + `Visual mode: ${row.visualMode}.`, + `Target endpoint frame to generate now: ${target}`, + `Opposite endpoint continuity reference: ${opposite}`, + `Overall visual plan: ${row.visualPlan}`, row.needsSubject - ? `人物主体 brief:${subjectBrief || "主体 brief 暂缺,请保持一个统一的商业广告主体,肩颈区域清晰可佩戴产品。"}。主体只依据这段文字身份描述,不上传主体参考图;可以根据本镜头自由改变动作、景别、表情和环境,但不能换成另一个人设。不要回到原视频关键帧复刻人物。` - : "本条不需要主角人物;如出现人物,只能是局部手部、背影或环境人物,不要生成透明骨架主角。", + ? `Subject identity brief: ${subjectBrief || "Subject brief is missing. Keep one unified commercial ad subject with clear neck-and-shoulder area for product placement."}. Use only this text identity brief; no subject reference image is uploaded. The subject may freely change pose, framing, expression, gesture, and environment for this shot, but must not become a different character. Do not copy the original source-video person or keyframe.` + : "This beat does not need a main character. If people appear, they should only be partial hands, back-view background figures, or environmental figures; do not generate the transparent skeleton main subject.", row.needsProduct - ? `产品融入:${row.productPlacement}。${row.productIntegration}。本次只提供 ${selectedProductItems.length} 张同一 SKG 肩颈按摩仪产品硬参考;${productNotes}。产品是套在脖子上的 U 形肩颈按摩仪,必须保持真实佩戴大小、左右非对称、按键、触点、厚度和贴颈位置。` - : "本条不露出产品,不要强行生成 SKG 产品、包装、白底图或随机商品。", - "输出一张单独的 9:16 高清首/尾帧,不要拼图,不要字幕,不要平台 UI,不要水印。画面要能作为后续视频生成的明确起止帧。", + ? `Product integration: ${row.productPlacement}. ${row.productIntegration}. This request provides ${selectedProductItems.length} rigid reference image(s) of the same SKG neck-and-shoulder massager: ${productNotes}. The product is a U-shaped wearable device worn around the neck and shoulders. Preserve realistic wearable scale, left-right asymmetry, button placement, contact pads, side thickness, and neck-contact position.` + : "Do not show the product in this beat. Do not force-generate an SKG product, package, white-background product image, or random merchandise.", + "Output one single 9:16 high-definition endpoint frame. No contact sheet, no multiple views, no subtitles, no platform UI, no watermark. The image must work as a clear first/last frame for downstream video generation.", ].join("\n") } @@ -1462,10 +1659,10 @@ function buildStoryboardSceneFromAudioRow( const subjectNotes = subjectReferenceNotes(subjectRefs) const subjectBrief = subjectBriefForEndpoint(row, subjectRefs) const productGuidance = !row.needsProduct - ? "本条规划为不露出产品或不把产品作为画面主体;视频生成时不要硬插 SKG 产品、包装、白底图或错误商品。" + ? "This beat is planned without product visibility or without product as the visual subject. Do not force-insert an SKG product, package, white-background product render, or incorrect merchandise during video generation." : productItems.length - ? `产品素材池共有 ${productItems.length} 张,本条只选用 ${selectedProductItems.length} 张最相关参考图,不要把未选素材混入本条画面。产品硬定义:这是套在脖子上的 U 形肩颈按摩仪,不是耳机、头戴设备或护颈枕。坐标系硬规则:左/右按佩戴者身体左右,不能按图片左右;上=靠近下巴/脸/颈部上沿,下=靠近锁骨/肩部下沿;内侧=贴颈皮肤/按摩触点,外侧=外壳/按键/Logo。所选图片只作为产品结构、角度、比例和细节参考,不要照搬参考图的白底/黑底/棚拍背景。视角标注:${notes}。保留左右非对称细节,不要把两边做成镜像对称;肩颈产品大小必须贴近真实佩戴比例,不能缩成耳机,也不能放大成护颈枕。` - : "未上传产品图时使用默认 SKG 产品图;生成前建议先建立同一产品素材池,锁定左右差异、厚度和佩戴比例。" + ? `The product pool has ${productItems.length} image(s); this beat selects only the ${selectedProductItems.length} most relevant reference image(s). Do not mix unselected assets into this shot. Rigid product definition: this is a U-shaped neck-and-shoulder wearable massager, not headphones, a headset, or a neck pillow. Coordinate rule: left/right refer to the wearer's body, not the image; top means closer to chin/face/upper neck, bottom means closer to collarbone/shoulders; inner means skin-contact side and massage pads, outer means shell/buttons/logo. Selected images are only product structure, angle, scale, and detail references; do not copy the white/black/studio background. View notes: ${notes}. Preserve left-right asymmetry; do not mirror the two sides. The shoulder-neck product size must match realistic wearing scale, not earphone-small and not neck-pillow-large.` + : "No product images are uploaded. Use the default SKG product concept only if needed, and preferably establish a same-product pool before generation to lock left-right differences, thickness, and wearing scale." return { duration: Number(Math.max(3.2, Math.min(6.5, row.end - row.start || 4.5)).toFixed(1)), first_image: endpointRefs.firstImage ?? null, @@ -1482,11 +1679,11 @@ function buildStoryboardSceneFromAudioRow( subject_images: row.needsSubject ? subjectRefs : [], subject_image: row.needsSubject ? subjectRefs[0] ?? null : null, subject: row.needsSubject - ? `${subjectDescription}\n主体动作/画面要素:${row.keyElements}\n主体真源:从已生成的相似主体白底视图中按本镜头需求选择 ${subjectRefs.length} 张;${subjectNotes}。关键帧只用于前置主体提取,不作为后续视频首尾帧参考。` - : "本条不需要人物主体或相似主体参考;如画面里出现人物,只作为背景或局部,不作为主角。", - scene: `镜头类型:${VISUAL_MODE_OPTIONS.find((item) => item.value === row.visualMode)?.label ?? row.visualMode}\n${row.visualPlan}\n首帧规划:${row.firstFramePlan}\n尾帧规划:${row.lastFramePlan}\n原音频依据:${row.source}`, - product: `产品需求:${row.needsProduct ? "需要产品参考" : "本条不需要产品"}\n产品出现方式:${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "本条以情绪、人物状态、空间或节奏过渡为主,不露出产品。"}\n${productGuidance}`, - action: `${row.skgCopy}\n连续动作:从首帧规划自然过渡到尾帧规划,镜头类型和产品/人物需求不能中途改变。`, + ? `${subjectDescription}\nSubject action and visual elements: ${row.keyElements}\nSubject source: select ${subjectRefs.length} generated similar-subject view(s) according to this shot's need; ${subjectNotes}. Source keyframes are only used for upstream subject extraction and must not be used as direct endpoint-frame references.` + : "No main character or similar-subject reference is needed for this beat. If people appear, they should be background or partial-body context, not the main subject.", + scene: `Visual mode: ${row.visualMode}\n${row.visualPlan}\nFirst-frame plan: ${row.firstFramePlan}\nLast-frame plan: ${row.lastFramePlan}\nSource audio reference: ${row.source}`, + product: `Product requirement: ${row.needsProduct ? "product reference required" : "no product required for this beat"}\nProduct placement: ${row.productPlacement}\n${row.needsProduct ? row.productIntegration : "This beat focuses on emotion, subject state, space, or pacing transition and should not show the product."}\n${productGuidance}`, + action: `${row.skgCopy}\nContinuity action: transition naturally from the first-frame plan to the last-frame plan. The visual mode and product/subject requirements must not change mid-clip.`, reference_ids: [], } } @@ -3103,8 +3300,10 @@ function AudioStoryboardPlanPanel({ const [productAnalyzing, setProductAnalyzing] = useState(false) const [productAngleBusy, setProductAngleBusy] = useState(null) const [copyOverrides, setCopyOverrides] = useState>({}) + const [copyZhOverrides, setCopyZhOverrides] = useState>({}) const [planOverrides, setPlanOverrides] = useState>({}) const [authorIntent, setAuthorIntent] = useState("") + const [showChineseMirror, setShowChineseMirror] = useState(true) const [scriptRewriteBusy, setScriptRewriteBusy] = useState<"all" | number | null>(null) const productFileRef = useRef(null) const productPersistSeq = useRef(0) @@ -3146,6 +3345,7 @@ function AudioStoryboardPlanPanel({ } const copyForRow = (row: AudioStoryboardRow) => copyOverrides[row.index] ?? row.skgCopy + const copyZhForRow = (row: AudioStoryboardRow) => copyZhOverrides[row.index] ?? row.skgCopyZh const patchRowCopy = (rowIndex: number, value: string) => { setCopyOverrides((prev) => ({ ...prev, [rowIndex]: value })) @@ -3163,7 +3363,9 @@ function AudioStoryboardPlanPanel({ needsProduct: defaults.needsProduct, needsSubject: defaults.needsSubject, subjectDescription: row ? buildSubjectDescription(row.role, mode) : "", + subjectDescriptionZh: row ? buildSubjectDescriptionZh(row.role, mode) : "", productPlacement: defaults.productPlacement, + productPlacementZh: visualModeDefaults(mode, "zh").productPlacement, }) } @@ -3328,7 +3530,7 @@ function AudioStoryboardPlanPanel({ await analyzeAndCompleteProductViews(productItems.map((item) => item.ref)) } - const applyScriptRewriteItems = (items: Array<{ index: number; text: string }>) => { + const applyScriptRewriteItems = (items: Array<{ index: number; text: string; text_zh?: string }>) => { if (!items.length) return setCopyOverrides((prev) => { const next = { ...prev } @@ -3337,6 +3539,13 @@ function AudioStoryboardPlanPanel({ } return next }) + setCopyZhOverrides((prev) => { + const next = { ...prev } + for (const item of items) { + if (item.text_zh?.trim()) next[item.index] = item.text_zh.trim() + } + return next + }) } const rewriteSingleRow = async (row: AudioStoryboardRow) => { @@ -3426,13 +3635,16 @@ function AudioStoryboardPlanPanel({ setEndpointFrameBusy(busyKey) try { await saveRowStoryboardDraft(plannedRow, frame) + const rawPrompt = buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief) + const prompt = await ensureEnglishForModel(rawPrompt) + const englishSubjectBrief = await ensureEnglishForModel(subjectBrief) const updated = await generateSceneAsset(job.id, frame.index, { size: SUBJECT_ASSET_SIZE, scene_mode: "similar", scene_style: "premium_product", asset_role: role, - prompt: buildEndpointFramePrompt(plannedRow, role, selectedProductItems, subjectBrief), - subject_brief: subjectBrief, + prompt, + subject_brief: englishSubjectBrief, product_images: selectedProductItems.map((item) => item.ref), source_frame_indices: [], }) @@ -3622,6 +3834,13 @@ function AudioStoryboardPlanPanel({ />
+