From 35fc0883756dca66c944e0d8e99c72f1540b24d6 Mon Sep 17 00:00:00 2001 From: kang Date: Wed, 20 May 2026 12:51:02 +0800 Subject: [PATCH] feat: add subject image agent workflow --- .memory/worklog.json | 165 +++--- api/main.py | 263 ++++++++- docs/source-analysis.html | 33 +- web/components/ad-recreation-board.tsx | 704 +++++++++++++++---------- web/lib/api.ts | 78 +++ 5 files changed, 873 insertions(+), 370 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 1bae706..a4cd800 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,85 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T09:48:27Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T09:58:27Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:08:27Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:18:27Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:28:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:38:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:48:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T10:58:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T11:08:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:feat: optimize product pool uploads", - "ts": "2026-05-17T11:18:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 2, - "hash": "a9d5962", - "message": "fix: tolerate product view model output", - "ts": "2026-05-17T19:24:23+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:fix: tolerate product view model output", - "ts": "2026-05-17T11:28:28Z", - "type": "session-heartbeat" - }, - { - "files_changed": 4, - "hash": "96c998c", - "message": "auto-save 2026-05-17 19:32 (~4)", - "ts": "2026-05-17T19:32:19+08:00", - "type": "commit" - }, { "files_changed": 4, "hash": "5c6a16d", @@ -3247,6 +3167,91 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:fix: isolate subject reference generation", "files_changed": 2 + }, + { + "ts": "2026-05-20T11:43:57+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 11:43 (~2)", + "hash": "5f37dd9", + "files_changed": 2 + }, + { + "ts": "2026-05-20T11:46:04+08:00", + "type": "commit", + "message": "docs: record subject reference deployment", + "hash": "3d198b0", + "files_changed": 1 + }, + { + "ts": "2026-05-20T03:53:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:docs: record subject reference deployment", + "files_changed": 1 + }, + { + "ts": "2026-05-20T04:03:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:docs: record subject reference deployment", + "files_changed": 1 + }, + { + "ts": "2026-05-20T04:13:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:docs: record subject reference deployment", + "files_changed": 1 + }, + { + "ts": "2026-05-20T04:23:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 1 项未提交变更 · 最近提交:docs: record subject reference deployment", + "files_changed": 1 + }, + { + "ts": "2026-05-20T12:27:41+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 12:27 (~2)", + "hash": "935fa6c", + "files_changed": 2 + }, + { + "ts": "2026-05-20T12:33:07+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 12:33 (~4)", + "hash": "890460f", + "files_changed": 4 + }, + { + "ts": "2026-05-20T04:33:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:auto-save 2026-05-20 12:33 (~4)", + "files_changed": 2 + }, + { + "ts": "2026-05-20T12:38:32+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 12:38 (~2)", + "hash": "b01dc36", + "files_changed": 2 + }, + { + "ts": "2026-05-20T04:43:57Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 3 项未提交变更 · 最近提交:auto-save 2026-05-20 12:38 (~2)", + "files_changed": 3 + }, + { + "ts": "2026-05-20T12:43:59+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 12:43 (~3)", + "hash": "799b354", + "files_changed": 3 + }, + { + "ts": "2026-05-20T12:49:25+08:00", + "type": "commit", + "message": "auto-save 2026-05-20 12:49 (~2)", + "hash": "fa2e813", + "files_changed": 2 } ] } diff --git a/api/main.py b/api/main.py index 3d39c96..854ebe7 100644 --- a/api/main.py +++ b/api/main.py @@ -106,6 +106,9 @@ IMAGE_FALLBACK_ENABLED = os.getenv("IMAGE_FALLBACK_ENABLED", "true").strip().low IMAGE_MODEL = GPT_IMAGE_MODEL PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL +SubjectModelBundle = Literal["gpt", "gemini"] +SUBJECT_AGENT_GPT_MODEL = gpt_model_env("SUBJECT_AGENT_GPT_MODEL", VISION_MODEL) +SUBJECT_AGENT_GEMINI_MODEL = os.getenv("SUBJECT_AGENT_GEMINI_MODEL", "gemini-2.5-flash").strip() or "gemini-2.5-flash" SUBJECT_ASSET_IMAGE_MODELS = [GPT_IMAGE_MODEL] + ( [IMAGE_FALLBACK_MODEL] if IMAGE_FALLBACK_ENABLED and IMAGE_FALLBACK_MODEL and IMAGE_FALLBACK_MODEL != GPT_IMAGE_MODEL else [] ) @@ -734,6 +737,39 @@ class AudioScript(BaseModel): created_at: float = 0.0 +class SubjectAgentAnalysis(BaseModel): + model_bundle: SubjectModelBundle = "gpt" + model: str = "" + source_frame_indices: list[int] = Field(default_factory=list) + summary_zh: str = "" + summary_en: str = "" + generation_brief_en: str = "" + trait_chips: list[str] = Field(default_factory=list) + mode_options: list[str] = Field(default_factory=list) + questions: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + created_at: float = 0.0 + + +class SubjectAgentMessage(BaseModel): + role: Literal["user", "assistant"] = "assistant" + content: str = "" + created_at: float = 0.0 + + +class SubjectAgentState(BaseModel): + model_bundle: SubjectModelBundle = "gpt" + source_frame_indices: list[int] = Field(default_factory=list) + analysis: SubjectAgentAnalysis | None = None + messages: list[SubjectAgentMessage] = Field(default_factory=list) + selected_mode: Literal["realistic", "cartoon", "elements", "custom"] = "custom" + selected_traits: list[str] = Field(default_factory=list) + requirements_zh: str = "" + generation_prompt_en: str = "" + quantity: int = 6 + updated_at: float = 0.0 + + class Job(BaseModel): id: str url: str @@ -751,6 +787,7 @@ class Job(BaseModel): storyboard_images: list[StoryboardImage] = Field(default_factory=list) generated_videos: list[GeneratedVideo] = Field(default_factory=list) product_refs: list[dict] = Field(default_factory=list) + subject_agent: SubjectAgentState = Field(default_factory=SubjectAgentState) error: str = "" @@ -3892,7 +3929,7 @@ def _image_path_to_data_url(path: Path) -> str: return f"data:{media_type};base64,{base64.b64encode(path.read_bytes()).decode('ascii')}" -def _vision_brief_from_images(image_paths: list[Path], prompt: str, max_images: int = 8) -> str: +def _vision_brief_from_images(image_paths: list[Path], prompt: str, max_images: int = 8, model: str | None = None) -> str: paths = [path for path in image_paths if path.exists()][:max_images] if not paths: return "" @@ -3903,7 +3940,7 @@ def _vision_brief_from_images(image_paths: list[Path], prompt: str, max_images: content.append({"type": "image_url", "image_url": {"url": _image_path_to_data_url(path)}}) try: resp = llm().chat.completions.create( - model=VISION_MODEL, + model=model or VISION_MODEL, messages=[{"role": "user", "content": content}], response_format={"type": "json_object"}, temperature=0.1, @@ -3977,12 +4014,170 @@ def _describe_subject_consensus_from_images(name: str, subject_style: str, image return _vision_brief_from_images(image_paths, prompt, max_images=10) +def _subject_agent_model(bundle: SubjectModelBundle) -> str: + return SUBJECT_AGENT_GEMINI_MODEL if bundle == "gemini" else SUBJECT_AGENT_GPT_MODEL + + +def _subject_agent_image_model(bundle: SubjectModelBundle) -> str: + return IMAGE_FALLBACK_MODEL if bundle == "gemini" and IMAGE_FALLBACK_MODEL else GPT_IMAGE_MODEL + + +def _list_of_strings(value, limit: int = 18) -> list[str]: + if isinstance(value, list): + return [str(item).strip()[:80] for item in value if str(item).strip()][:limit] + if isinstance(value, str) and value.strip(): + return [part.strip()[:80] for part in re.split(r"[,,;;\n]", value) if part.strip()][:limit] + return [] + + +def _subject_agent_json_from_images(job_id: str, source_indices: list[int], bundle: SubjectModelBundle) -> dict: + paths = [_source_frame_path(job_id, idx) for idx in source_indices] + paths = [path for path in paths if path.exists()][:8] + if not paths or not LLM_API_KEY: + return {} + prompt = ( + "You are the image-generation requirements agent for an SKG ad-subject reconstruction workspace. " + "Only analyze the attached reference images for future subject pack generation. Do not discuss video, audio, copywriting, download, or unrelated tasks. " + "The user may later choose whether to preserve the visible subject, preserve only the creative concept with a new person, mix selected elements, or create from a new description. " + "Output strict JSON only with these keys: summary_zh, summary_en, generation_brief_en, trait_chips, mode_options, questions, warnings. " + "summary_zh: 2-4 concise Chinese sentences describing visible subject, concept, outfit/material, camera usefulness. " + "summary_en and generation_brief_en: English only. generation_brief_en is a direct image-generation brief that preserves useful traits while avoiding copyrighted/identifying replication unless user explicitly selects source-locked mode. " + "trait_chips: 8-18 short Chinese selectable traits. Include identity category, anatomy/material, clothing, color, style, framing, and useful negative constraints. " + "mode_options: short Chinese labels for likely choices. questions: 2-4 Chinese questions to clarify generation. warnings: Chinese notes about identity/copyright/consistency risk." + ) + content: list[dict] = [{"type": "text", "text": prompt}] + for path in paths: + content.append({"type": "image_url", "image_url": {"url": _image_path_to_data_url(path)}}) + try: + resp = llm().chat.completions.create( + model=_subject_agent_model(bundle), + messages=[{"role": "user", "content": content}], + response_format={"type": "json_object"}, + temperature=0.15, + max_tokens=1600, + ) + raw = (resp.choices[0].message.content or "").strip() + if not raw: + raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip() + match = re.search(r"\{[\s\S]*\}", raw) + raw = match.group(0) if match else raw + data = json.loads(raw) + return data if isinstance(data, dict) else {} + except Exception as e: + print(f"[subject agent analyze failed] bundle={bundle} error={e}", flush=True) + return {} + + +def _subject_agent_analysis(job_id: str, source_indices: list[int], bundle: SubjectModelBundle) -> SubjectAgentAnalysis: + clean_indices = list(dict.fromkeys(int(idx) for idx in source_indices if isinstance(idx, int) or str(idx).isdigit()))[:8] + model = _subject_agent_model(bundle) + data = _subject_agent_json_from_images(job_id, clean_indices, bundle) + brief_en = _ensure_english(str(data.get("generation_brief_en") or data.get("summary_en") or "").strip()) if data else "" + if not data: + data = { + "summary_zh": "已接收参考帧,但模型没有返回可用结构化分析。你仍可以在下方描述要保留或改变的主体元素。", + "summary_en": "Reference frames were received, but no structured analysis was returned.", + "generation_brief_en": "Use the selected reference frames as visual evidence for a new consistent SKG ad subject pack. Keep neck and shoulder readability clear.", + "trait_chips": ["同一主体", "服装统一", "肩颈清晰", "白底", "六视图"], + "mode_options": ["形象锁定", "创意复刻", "元素混合", "自主描述"], + "questions": ["你要保留原主体外形,还是只保留创意模式?", "是否需要改变人物年龄、性别、服装或风格?"], + "warnings": ["模型分析失败时请用文字补充关键要求。"], + } + brief_en = str(data["generation_brief_en"]) + return SubjectAgentAnalysis( + model_bundle=bundle, + model=model, + source_frame_indices=clean_indices, + summary_zh=str(data.get("summary_zh") or "").strip()[:1800], + summary_en=str(data.get("summary_en") or "").strip()[:1800], + generation_brief_en=brief_en[:2200], + trait_chips=_list_of_strings(data.get("trait_chips"), 24), + mode_options=_list_of_strings(data.get("mode_options"), 8), + questions=_list_of_strings(data.get("questions"), 8), + warnings=_list_of_strings(data.get("warnings"), 8), + created_at=time.time(), + ) + + +def _subject_agent_message_update(state: SubjectAgentState, user_message: str) -> tuple[str, str, str, int, list[str]]: + current_req = state.requirements_zh.strip() + selected_traits = state.selected_traits[:20] + quantity = max(1, min(10, int(state.quantity or 6))) + qty_match = re.search(r"(\d{1,2})\s*张", user_message) + if qty_match: + quantity = max(1, min(10, int(qty_match.group(1)))) + fallback_req = ";".join(part for part in [current_req, user_message.strip()] if part).strip(";") + fallback_prompt = _ensure_english( + "Subject image generation requirements: " + + (fallback_req or "create a consistent SKG ad subject pack") + + ". Keep one identity and one outfit bible across all generated views. " + + (f"Selected traits: {', '.join(selected_traits)}." if selected_traits else "") + ) + if not LLM_API_KEY: + return "已记录这条生图要求。继续补充要保留/删除的元素,确认后我会按当前要求生成。", fallback_req, fallback_prompt, quantity, selected_traits + system = ( + "You are an SKG subject image-generation requirements agent. Your scope is only image generation for a subject view pack. " + "Do not answer unrelated video, audio, download, coding, copywriting, or general chat requests; redirect to subject image requirements. " + "Normalize the user's fuzzy Chinese request into precise generation constraints. " + "Return strict JSON with keys: assistant_message_zh, updated_requirements_zh, generation_prompt_en, quantity, selected_traits. " + "generation_prompt_en must be English and must enforce: one consistent identity, one consistent outfit bible, neck/shoulder readability, no text/watermarks/UI, and legal-safe reconstruction." + ) + user_payload = { + "analysis": state.analysis.model_dump() if state.analysis else None, + "current_requirements_zh": current_req, + "current_generation_prompt_en": state.generation_prompt_en, + "current_quantity": quantity, + "selected_mode": state.selected_mode, + "selected_traits": selected_traits, + "recent_messages": [m.model_dump() for m in state.messages[-8:]], + "user_message": user_message, + } + try: + resp = llm().chat.completions.create( + model=_subject_agent_model(state.model_bundle), + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": json.dumps(user_payload, ensure_ascii=False)}, + ], + response_format={"type": "json_object"}, + temperature=0.2, + max_tokens=1200, + ) + raw = (resp.choices[0].message.content or "").strip() + match = re.search(r"\{[\s\S]*\}", raw) + data = json.loads(match.group(0) if match else raw) + assistant = str(data.get("assistant_message_zh") or "已记录这条生图要求。").strip()[:1200] + updated_req = str(data.get("updated_requirements_zh") or fallback_req).strip()[:2200] + prompt_en = _ensure_english(str(data.get("generation_prompt_en") or fallback_prompt).strip())[:2600] + out_quantity = max(1, min(10, int(data.get("quantity") or quantity))) + out_traits = _list_of_strings(data.get("selected_traits"), 24) or selected_traits + return assistant, updated_req, prompt_en, out_quantity, out_traits + except Exception as e: + print(f"[subject agent message failed] bundle={state.model_bundle} error={e}", flush=True) + return "已先按本地规则记录这条要求;模型回复失败时仍可直接生成。", fallback_req, fallback_prompt, quantity, selected_traits + + # ---------- API 路由 ---------- class CreateJobReq(BaseModel): url: str +class SubjectAgentAnalyzeReq(BaseModel): + model_bundle: SubjectModelBundle = "gpt" + source_frame_indices: list[int] = Field(default_factory=list) + + +class SubjectAgentMessageReq(BaseModel): + model_bundle: SubjectModelBundle = "gpt" + source_frame_indices: list[int] = Field(default_factory=list) + selected_mode: Literal["realistic", "cartoon", "elements", "custom"] = "custom" + selected_traits: list[str] = Field(default_factory=list) + requirements_zh: str = "" + message: str = "" + quantity: int = 6 + + class TranslateReq(BaseModel): text: str target: Literal["en", "zh"] = "en" @@ -4451,6 +4646,70 @@ def get_job(job_id: str) -> Job: return job_with_artifacts(job) +@app.post("/jobs/{job_id}/subject-agent/analyze", response_model=Job) +def analyze_subject_agent(job_id: str, req: SubjectAgentAnalyzeReq) -> Job: + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + source_indices = [idx for idx in req.source_frame_indices if any(frame.index == idx for frame in job.frames)][:8] + if not source_indices: + raise HTTPException(400, "source_frame_indices required") + analysis = _subject_agent_analysis(job_id, source_indices, req.model_bundle) + state = job.subject_agent.model_copy(deep=True) + assistant_text = ( + f"我已用 {req.model_bundle.upper()} 套件分析这些参考帧。" + "你可以选择形象锁定、创意复刻、元素混合或自主描述,也可以继续告诉我要改数量、风格、服装、人物大小。" + ) + messages = (state.messages + [SubjectAgentMessage(role="assistant", content=assistant_text, created_at=time.time())])[-30:] + state = state.model_copy(update={ + "model_bundle": req.model_bundle, + "source_frame_indices": source_indices, + "analysis": analysis, + "messages": messages, + "generation_prompt_en": analysis.generation_brief_en, + "selected_traits": analysis.trait_chips[:6], + "updated_at": time.time(), + }) + update(job, subject_agent=state, message="转换层分析完成") + return job_with_artifacts(job) + + +@app.post("/jobs/{job_id}/subject-agent/message", response_model=Job) +def message_subject_agent(job_id: str, req: SubjectAgentMessageReq) -> Job: + job = JOBS.get(job_id) + if not job: + raise HTTPException(404, "job not found") + state = job.subject_agent.model_copy(deep=True) + source_indices = [idx for idx in req.source_frame_indices if any(frame.index == idx for frame in job.frames)][:8] + state = state.model_copy(update={ + "model_bundle": req.model_bundle, + "source_frame_indices": source_indices or state.source_frame_indices, + "selected_mode": req.selected_mode, + "selected_traits": [str(item).strip()[:80] for item in req.selected_traits if str(item).strip()][:24], + "requirements_zh": req.requirements_zh.strip()[:2200] or state.requirements_zh, + "quantity": max(1, min(10, int(req.quantity or state.quantity or 6))), + }) + user_message = req.message.strip() + if not user_message: + user_message = state.requirements_zh or "按当前设置准备主体套图生成要求" + assistant_text, requirements_zh, prompt_en, quantity, selected_traits = _subject_agent_message_update(state, user_message) + messages = ( + state.messages + + [SubjectAgentMessage(role="user", content=user_message, created_at=time.time())] + + [SubjectAgentMessage(role="assistant", content=assistant_text, created_at=time.time())] + )[-30:] + state = state.model_copy(update={ + "requirements_zh": requirements_zh, + "generation_prompt_en": prompt_en, + "quantity": quantity, + "selected_traits": selected_traits, + "messages": messages, + "updated_at": time.time(), + }) + update(job, subject_agent=state, message="转换层生图要求已更新") + return job_with_artifacts(job) + + @app.delete("/jobs/{job_id}") def delete_job(job_id: str) -> dict[str, bool | str]: d = (JOBS_DIR / job_id).resolve() diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 89f3181..bb172ca 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -594,7 +594,7 @@ web/app/globals.css全局主题变量、登录页视觉样式、信息流工作台同源品牌 token、ReactFlow 样式引用,以及本地开发态 nextjs-portal 遮挡隐藏规则。工作台在 skg-board-theme 内定义 --skg-gold-1--skg-gold-2--skg-cream--skg-bg-*--skg-text-*--skg-radius-* 和按钮阴影等变量,并新增 skg-board-brandskg-stat-cardskg-primary-actionskg-secondary-actionskg-empty-state 等样式。暗色工作台复用登录页金色聚焦、米白主按钮和弱暖光氛围;明亮模式通过 skg-board-theme--light 复用同一套结构,改成暖白底、白色 panel、黑底主 CTA 和深色文本,不另起一套界面。 web/app/page.tsx产品工作台主状态:jobs、activeJobId、生成任务状态;主渲染为全屏素材输入列 + 信息流广告复刻工作表;“开始分析”会把 job 放入并行素材分析队列,下载完成后触发 triggerTranscribe 解析音频,并触发 analyzeJob 自动抽 12 张参考帧,形成“音频文案路 + 视频视觉路”同步推进;音频失败时会忽略失败状态下残留的半成品 transcript,允许再次触发音频解析;底部吸附音频条和旧全局浮动主题按钮不再从主界面渲染,避免和工作台内的明暗模式切换重复。 web/components/ad-recreation-board.tsx信息流广告复刻工作表:顶部先展示与登录页连续的 SKG brand strip,包含 SKG 字标、“未来健康 · 营销内容工作台”和“营销内容工作台 · TK 二创”;右侧素材/任务/视频/文案统计改为米白 stat 卡片,主动作按钮统一走 skg-primary-action,次动作走 skg-secondary-action,空状态复用 AnimatedLoginCharactersbuildWorkflowSteps 仍统一生成 01-09 流程顺序、状态和判定依据,WorkflowStepBadge / PipelineLane / 分镜列标题也继续共用同一套编号;但完整 WorkflowOrderBar、右侧素材/视频/音频/文案/参考帧需求 chips、文案依据下拉和“音频文案、抽帧参考、主体重构、产品素材池”四个状态条不再默认渲染在工作区顶部。左侧素材输入只负责链接/上传和任务切换,不再重复放横版原视频预览;右侧源视频工作区直接进入核心操作。讲话人、节奏和背景音分析仍写入 AudioScript,但不再作为“音频解析结果”卡片默认渲染;主工作区左侧宽度调整为 430-460px,上方是按 9:16 显示的竖版原视频播放器,播放器内覆盖“当前点抽帧”,按当前播放秒数手动补参考帧,播放器下方是逐句时间轴,英文和中文都最多显示两行;右侧上方是无标题的波形与切点参考框,下方是三栏主体管线。音频波形用参考图式的连续灰色包络显示响度、停顿和密集爆点,顶部把低/中/高密度按钮和当前播放秒数、总时长、鼠标指针停点秒数直接放在波形上方。视频播放时通过 requestAnimationFrame 平滑驱动波形播放线,同时同步高亮并滚动当前句;点击音频波形或字幕行会跳转原视频时间。音频波形下方同框渲染无标题的 TimelineFilmstrip 临时画面胶片,前端按低/中/高密度从源视频 canvas 截取预览缩略图,并按 frame.time / duration 的百分比定位到和波形同一条时间轴上;波形与胶片之间不显示分隔横线,胶片轨道贴近波形,缩略图轻微上下错落并倾斜重叠排列,hover 时用同一张胶片卡在原位置生成固定顶层克隆,约 4.8 倍放大并自动限制在视口内,避免被工作区、滚动容器或相邻面板遮挡;单击胶片只跳转原视频时间,不写入任务数据,双击胶片或拖进参考帧池时才调用手动抽帧并正式加入 job.frames,已加入的胶片显示“已添加”;胶片预览按 job、视频、密度和时长缓存,未切换低/中/高时返回页面不重新扫视频。右侧参考帧池的主入口是“自动抽帧 12 张”,一键按动作峰值目标重新抽取 12 张源视频参考帧,优先抓手势、表情变化、节奏点和镜头变化;缩略图按竖版完整比例显示不裁切,点选状态直接叠在参考帧池缩略图上,鼠标停留会通过固定浮层放大展示完整帧。转换层不再暴露“生成 10 张高清图”、透明骨架/真人或完整/常用视图开关,改成真人重构、卡通重构、元素重构、自主描述四个投放区;每区最多 3 张参考帧,拖入只加入该区参考队列,用户放好参考和文字后点击按钮才调用 generateSubjectAssets 固定生成 6 视图,卡通重构可选择具体卡通风格,文字方向会进入 prompt;转换层顶部新增生图模型选择(自动 / GPT / Gemini),选择写入当前 job 作用域的 localStorage,只影响当前项目的主体套图生成;四个方向的提示词输入会记忆当前项目常用短语并生成可点击小按键,点击会追加到当前提示词。主体元素区按重构类型分组显示结果;只展示每个 view 的最新一张,缩略图上提供“重新生成这一张”和“删除这一张”,单张重生会用 replace_views=true 替换同一视角。前端当前对真人/元素/自主描述传 subject_style=source_actor,对卡通重构传 subject_style=cartoon_subject;真人、卡通、元素和有文字的自主描述使用 reconstruction_mode=similar,后端先识别关键帧 brief,再把参考帧作为 /images/edits 的 image refs 一起提交;自主描述空文本使用 reconstruction_mode=same 做源形象锁定。主体生成完成后会形成 subject_consensus_brief。音频结果下方是信息流复刻分镜工作台:顶部产品参考区是“同一产品素材池”,不限量上传产品图,不做不同产品身份判断;上传原图推荐长边 1200-2000px、短边至少 600px,但后端会统一生成最长边 1600px、JPEG 92 的 AI 工作副本,并回显尺寸、自动转换和风险标注;上传后按“套在脖子上的 U 形肩颈按摩仪”进行同一产品批量识别,左/右按佩戴者身体左右、上/下按佩戴方向,额外标注内外侧、开口方向、局部结构点、背景类型、用途标签、生成风险和备注,用户只检查备注,鼠标悬停通过固定浮层显示大图预览,能盖过滚动容器和分镜框架;缺视角补图失败时保留重试入口。脚本区在分镜行上方提供“作者想法”和“整片改写”,每行新口播文案可直接编辑并可单段 AI 改写。每条音频分镜默认是左侧三字段、右侧横向视频候选轨;高级区仍保留首尾帧 prompt、产品出现方式和旧 6 字段。ModelTrace 会在音频解析、产品识别/补图、主体重构视图包、脚本改写等入口旁直接展示模型名;生图入口会显示 gpt-image-2 / gemini-3-pro-image-preview 链路和短时熔断规则,点击后用固定浮层展示模型链路、输入输出和回退逻辑。旧分镜卡、抽帧控制和视频生成组件仍保留在文件里,但当前主路径不渲染。 - SourceSubjectPipeline源视频工作区右侧主体管线主路径:三栏分别是竖向 参考帧池转换层主体元素。参考帧池保留自动 12 张、胶片拖入正式成帧、点击勾选和删除;参考帧缩略图保持小尺寸固定宽度、aspect-[9/16]object-contain 显示,hover 预览通过 MediaAssetTile 的左侧紧凑浮层显示。转换层取消旧的“透明骨架 / 真人”和“完整 10 / 常用 4”开关,改成真人重构、卡通重构、元素重构、自主描述四个投放区;每个区最多保留 3 张参考帧,拖入只加入参考队列,不自动调用生成;用户放好参考和文字后点击按钮才调用 generateSubjectAssets 生成固定 6 视图。转换层顶部提供 autogpt-image-2gemini-3-pro-image-preview 三档模型选择,偏好存到当前 job 作用域的 localStorage["skg:subject-image-model:v1:{jobId}"];提示词输入存到当前 job 作用域的 localStorage["skg:subject-prompt-memory:v1:{jobId}"],会把旧词解析成短 chip,点击 chip 会追加到当前方向但不会跨项目带入。文字输入会参与 prompt,卡通重构额外提供 3D 动画、潮玩公仔、日系清爽、美式插画、黏土玩具、极简扁平等风格。真人、卡通、元素和有文字的自主描述强调参考创新:先把参考帧识别成主体 brief,再把参考帧作为 /images/edits 的 image refs 一起提交,默认继承参考里的性别、人种/肤色、年龄体态和角色气质这些广义特征,但生成的是同一个全新主体;自主描述没有文字时切到 reconstruction_mode=same,使用参考帧作为源形象锁定证据,避免被默认 prompt 随机换成另一个人。后端会为每次主体套图注入同一份 pack bible:参考创新模式锁定同一个全新主体和同一套服装,源形象锁定模式锁定参考帧里的可见主体、体态、发型、服装和配色;后处理会裁出白底主体并允许放大到画布高度上限约 96%,实测典型主体有效高度约 90%,避免模型生成“小人 + 大白边”。主体元素区按每次生成的 pack_id 组织成“套图文件夹”:顶部展开当前选中套图,下面是可滚动的套图包列表;同一方向可保留多套,生成中按 pack 显示 2/6 这类进度,单张完成就替换对应占位卡。缩略图复用 MediaAssetTile,支持 hover 放大、单张重生和删除。旧下方 SourceReferenceBuildPanel 不再主路径渲染。 + SourceSubjectPipeline源视频工作区右侧主体管线主路径:三栏分别是竖向 参考帧池转换层主体元素。参考帧池保留自动 12 张、胶片拖入正式成帧、点击勾选和删除;参考帧缩略图保持小尺寸固定宽度、aspect-[9/16]object-contain 显示,hover 预览通过 MediaAssetTile 的左侧紧凑浮层显示。转换层取消四个大投放区,改为项目内生图对话智能体:用户先把 1-3 张参考帧拖入同一个参考区,选择 GPT 套件Gemini 套件,点击“开始分析”后由后端 subject-agent/analyze 识别参考图关键特征、生成中文摘要、英文 generation brief、特征 chip、追问和风险提示。GPT 套件表示 GPT 分析/对话 + gpt-image-2 生图;Gemini 套件表示 Gemini 分析/对话 + gemini-3-pro-image-preview 生图,默认不跨套件偷偷兜底。转换层对话只处理主体生图需求,允许修改形象锁定、卡通重构、创意复刻、自主描述、数量、服装统一、人物占比、保留/删除元素等;对话和分析写入 Job.subject_agent,不再把模型选择作为主状态写入 localStorage。点击“生成 N 张”仍调用 generateSubjectAssets,按当前对话生成的英文 prompt、参考帧、模式和数量提交主体套图。形象锁定走 reconstruction_mode=same,其他参考创新走 similar 并把参考帧作为 /images/edits 的 image refs 一起提交。后端会为每次主体套图注入同一份 pack bible:参考创新模式锁定同一个全新主体和同一套服装,源形象锁定模式锁定参考帧里的可见主体、体态、发型、服装和配色;后处理会裁出白底主体并允许放大到画布高度上限约 96%,实测典型主体有效高度约 90%,避免模型生成“小人 + 大白边”。主体元素区按每次生成的 pack_id 组织成“套图文件夹”:顶部展开当前选中套图,下面是可滚动的套图包列表;同一方向可保留多套,生成中按 pack 显示 2/6 这类进度,单张完成就替换对应占位卡。缩略图复用 MediaAssetTile,支持 hover 放大、单张重生和删除。旧下方 SourceReferenceBuildPanel 不再主路径渲染。 AudioStoryboardPlanPanel 三字段候选生成当前分镜主路径:每行是左右双栏,左侧默认显示 skg_copy_*scene_one_line_*action_one_line_* 三组中英字段,右侧直接显示视频候选横向轨。用户改中文镜像后,字段失焦会通过 refineStoryboard 优化对应英文主值,失败时退回 translateText;英文仍是后续 prompt 主值。quickPlanStoryboard 把三字段和主体 brief 展开为完整 StoryboardScenegenerateStoryboardVideocount 可由单行数字控件选择,候选新生成后持续向右追加,不再用 4-grid 撑高每行。整片生成同样可选择每行数量,并以 concurrency=1 按行排队提交。产品素材池、批量控制、每行主体区和高级区都可折叠,高级抽屉仍展示旧 6 字段、首尾帧 prompt 和首尾帧资产槽,但客户默认不用先处理首尾帧。 web/components/resource-library/library-drawer.tsx全局资源中心浮窗:由工作台顶部“资源库”按钮打开,叠加在工作台上方但不阻塞主界面;尺寸、位置和当前 Tab 写入 localStorage["skg-resource-library-drawer"]。提示词 Tab 固定 5 列(场景描述、视频描述、主体描述、SKG 文案、产品角度),每列先显示 use_count 排名前 5 的“常用”,再按月份倒序分组;提示词节点常驻复制按钮,hover 可选英文/中文/双语复制,并调用 use 接口。素材 Tab 固定 4 列(主体、产品、场景、视频),节点不可拖动,按月份倒序硬编码排列;“应用到当前 job”只调用后端复制接口,得到普通 ImageRef(kind="asset") 后再写入产品素材池或复制 ID。浮窗顶部最近 24 小时横条混合显示提示词和素材;新建提示词、上传素材、删除前查引用、详情侧栏都在该组件内完成。 AdRecreationBoard 主题切换顶部指标区左侧有“明亮/暗色”按钮,使用 Sun / Moon 图标切换 skg-board-theme--light 类名,并把选择写入 localStorage["skg-board-theme"]。暗色仍是默认模式;明亮模式只改变工作台外观,不改变任务、素材、分镜、模型调用或接口数据。 @@ -706,6 +706,7 @@ api/main.py frames: KeyFrame[], transcript: TranscriptSegment[], audio_script: AudioScript, + subject_agent: SubjectAgentState, storyboard_images?: StoryboardImage[], product_refs?: ProductRefStateItem[] } @@ -723,6 +724,22 @@ api/main.py elements: KeyElement[], storyboard: StoryboardScene, generated_images: GeneratedImage[] +} + +
+

SubjectAgentState

+

转换层生图对话的项目内记忆。它跟随 Job 写入 state.json,用于恢复参考帧、模型套件、分析结果、对话、数量和最终英文生图 prompt。

+
SubjectAgentState {
+  model_bundle: "gpt" | "gemini",
+  source_frame_indices: number[],
+  analysis?: SubjectAgentAnalysis,
+  messages: SubjectAgentMessage[],
+  selected_mode: "realistic" | "cartoon" | "elements" | "custom",
+  selected_traits: string[],
+  requirements_zh,
+  generation_prompt_en,
+  quantity,
+  updated_at
 }
@@ -986,6 +1003,7 @@ ProductRefStateItem { 应用清洗POST /cleanup/applyapplyCleanedFrame物理覆盖 frames/{idx}.jpg,并备份原图。 元素增改删POST/PATCH/DELETE /elementsaddElement/updateElement/deleteElement让用户修正 Vision 错误,避免候选结果锁死。 元素提取POST /elements/{element_id}/cutoutcutoutElement调用图像模型生成独立白底素材图,每次累积一张 cutout。 + 转换层生图智能体POST /jobs/{id}/subject-agent/analyze
POST /jobs/{id}/subject-agent/messageanalyzeSubjectAgent
sendSubjectAgentMessage项目内主体生图对话状态入口。analyze 接收当前转换层参考帧和 model_bundle=gpt/gemini,调用对应套件的视觉模型输出中文摘要、英文 generation brief、特征 chip、追问和风险提示,并写入 Job.subject_agent.analysismessage 只处理主体生图需求,把用户对数量、风格、服装统一、形象锁定/创意复刻、保留/删除元素的自然语言改成 requirements_zh 和英文 generation_prompt_en。GPT 套件后续生图用 gpt-image-2,Gemini 套件后续生图用 gemini-3-pro-image-preview。 主体资产包POST /elements/{element_id}/subject-assets
DELETE /elements/{element_id}/subject-assets/{asset_id}generateSubjectAssets
deleteSubjectAsset根据转换层里的参考帧重新绘制一个统一主体资产包;前端按真人重构、卡通重构、元素重构、自主描述四个方向分别管理 source_frame_indices,每个方向最多 3 张参考帧,固定请求 frontthree_quarter_leftleftbackrightthree_quarter_right 六个视图,不再暴露完整 10 / 常用 4 选择。当前源视频工作区使用 subject_style=source_actor 承接真人、元素和自主描述,使用 subject_style=cartoon_subject 承接卡通重构;旧 transparent_human 仍为兼容类型但不是当前转换层默认入口。reconstruction_mode=similar 是参考创新路径:后端先用 VISION_MODEL 把关键帧反推成主体 brief;只要有参考帧,就把这些帧作为 /images/edits 的 image refs 一起提交,日志会显示 endpoint=/images/editsimage_refs>0,不再偷偷降级为纯文字生图。卡通重构在后端额外加入原创卡通/插画主体约束,明确不输出真实人物复制 likeness。生成完成后,后端会把生成视图反推/写入 KeyElement.subject_consensus_brief,作为后续首尾帧的唯一主体身份文字依据。reconstruction_mode=same 是源形象锁定路径:自主描述空文本时前端使用该路径,后端把参考帧作为 primary visual evidence,尽量保留同一可见主体、体态、发型、服装和配色。每个 view 单独调用一次生图,明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。单次图片请求受 IMAGE_REQUEST_TIMEOUT_SECONDS 控制,默认 60 秒;gpt-image-2 超时、429、5xx、DNS 或连接失败时可兜底 gemini-3-pro-image-preview,连续 2 次主模型上游类失败后 600 秒内短时熔断。仅当 image_model_preference=auto 时才启用兜底和熔断;用户显式选择 GPT 或 Gemini 时只走所选模型,方便已知某个上游不可用时直接切换。主体同一套图内一旦触发 Gemini,后续视图沿用 Gemini,避免风格混杂和重复等待主模型超时。主体 prompt 会要求从参考图继承性别、人种/肤色、年龄体态和角色气质等广义特征,但生成同一个全新主体;六视图必须保持同一脸部设定、发型、体态、服装类型、配色、材质、剪裁和配饰,不允许每个视角换衣服。后端新增 pack bible 固定字段,把主体、发型、肤色、体态、服装、鞋、配饰和禁止换装项注入每个视角;_normalize_asset_image(fill_subject=true) 裁白边后会按目标画布放大主体,而不是只用 thumbnail() 缩小,目标是让全身主体占画布高度约 88-94%。后端不再要求整包全成功才写入:单个视图失败时会保留已成功生成的主体图,返回“部分生成完成”,只有一张都没生成出来才返回错误。replace_views=true 时会替换同一视角旧图;删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。 主体套图状态SubjectAsset.status
pack_idweb/app/page.tsx
SourceSubjectPipelinegenerateSubjectAssets 现在先写入同一个 pack_id 下的 queued 占位卡并立即返回,后台按视角逐张生成,单张完成就把该占位替换成 completed 图片。前端轮询会把 queued / in_progress 主体资产纳入运行状态;主体元素区按 pack 显示套图文件夹,点击某个文件夹后展开该套图,其他套图顺位进入下方可滚动列表。 首尾帧资产POST /frames/{idx}/scene-assetgenerateSceneAsset同一接口兼容旧场景图和新首尾帧;当前信息流复刻流程传 asset_role=first_frame/last_framesubject_brief 和最多 1-2 张 product_images。首尾帧不再传主体图、不再把主体图和产品图拼成 contact sheet;主体只走文字 brief,允许新动作、新景别、新表情和新环境。若本条需要产品,后端只把产品参考图作为 gpt-image-2 image-edit 的硬视觉真源;若不需要产品,则走纯文字生图。关键帧只作为行数据承载位置。生成结果保存在 scene_assets,前端再写入 StoryboardScene.first_image/last_image。 @@ -1113,6 +1131,19 @@ ProductRefStateItem {

变更记录

这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

+
+
+

2026-05-20 · 转换层改为项目内生图对话智能体

+ UI + API + Workflow +
+
+

问题:四个投放区加 textarea 的转换层不适合“参考图先分析、再边聊边确定生图需求”的模糊决策流程;同时 GPT/Gemini 切换必须成套影响分析对话模型和生图模型,不能只切图片模型。

+

改动:Job 新增 subject_agent 状态,保存当前项目的模型套件、参考帧、AI 分析、对话消息、选中方向、特征 chip、数量和英文生成 prompt。后端新增 POST /jobs/{job_id}/subject-agent/analyzePOST /jobs/{job_id}/subject-agent/messageSourceSubjectPipeline 的转换层改成“GPT 套件 / Gemini 套件 + 参考图 + 开始分析 + 生图对话 + 数量 + 生成”结构。选 GPT 时分析/对话走 GPT、图片走 gpt-image-2;选 Gemini 时分析/对话走 Gemini、图片走 gemini-3-pro-image-preview

+

影响:转换层不再把生图模型偏好写入浏览器全局或 job-scoped localStorage 作为主状态;当前项目的生图记忆跟随 state.json。用户可以在对话里改“生成几张、保留什么、删除什么、人物放大、服装统一、形象锁定/创意复刻/卡通/自主描述”,确认后仍复用 generateSubjectAssets 逐张生成并进入右侧主体元素套图文件夹。

+
+

2026-05-20 · 转换层参考帧改为项目隔离和图像参考生图

diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index d8d24bb..ed730b3 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -4,7 +4,7 @@ import { type MouseEvent as ReactMouseEvent, type ReactNode, type RefObject, use import { createPortal } from "react-dom" import { AlertTriangle, BookOpen, Check, ChevronDown, Circle, Film, FileText, Image as ImageIcon, Info, Link2, Loader2, - Mic, Moon, Package, PanelRight, Play, Plus, RefreshCw, Scissors, Sparkles, Sun, Trash2, Upload, Wand2, + MessageSquare, Mic, Moon, Package, PanelRight, Play, Plus, RefreshCw, Scissors, Send, Sparkles, Sun, Trash2, Upload, Wand2, } from "lucide-react" import { toast } from "sonner" import { @@ -29,10 +29,12 @@ import { type StoryboardScene, type SubjectAsset, type SubjectImageModelPreference, + type SubjectModelBundle, type SubjectProfilePreference, type SubjectKind, addElement, analyzeJob, + analyzeSubjectAgent, analyzeProductViews, apiAssetUrl, characterLibraryImageUrl, @@ -59,6 +61,7 @@ import { rewriteStoryboardScript, saveSubjectTemplate, saveProductRefs, + sendSubjectAgentMessage, sourceAudioUrl, subjectTemplateImageUrl, updateElement, @@ -271,12 +274,6 @@ const SUBJECT_VIEW_ORDER = [ const COMMON_SUBJECT_VIEW_VALUES = ["front", "three_quarter_left", "three_quarter_right", "bust_front"] const RECONSTRUCTION_SUBJECT_VIEW_VALUES = ["front", "three_quarter_left", "left", "back", "right", "three_quarter_right"] const RECONSTRUCTION_FRAME_LIMIT = 3 -const EMPTY_RECONSTRUCTION_FRAME_MAP: Record = { - realistic: [], - cartoon: [], - elements: [], - custom: [], -} const DEFAULT_RECONSTRUCTION_DIRECTIONS: Record = { realistic: "", cartoon: "", @@ -294,9 +291,9 @@ const CARTOON_RECONSTRUCTION_STYLES: Array<{ value: CartoonReconstructionStyle; const RECONSTRUCTION_MODES: Array<{ value: SubjectReconstructionMode; label: string; subtitle: string; placeholder: string }> = [ { value: "realistic", - label: "真人重构", - subtitle: "参考非身份化人物特点,生成全新真人 6 视图", - placeholder: "如:更年轻、亚洲女性、运动感、不要像原人", + label: "形象锁定", + subtitle: "参考可见主体和服装,生成同一形象的多视图", + placeholder: "如:保持透明骨骼男孩、蓝色头带和短裤,人物更大", }, { value: "cartoon", @@ -306,8 +303,8 @@ const RECONSTRUCTION_MODES: Array<{ value: SubjectReconstructionMode; label: str }, { value: "elements", - label: "元素重构", - subtitle: "参考姿态、色块和镜头语言,生成差异化主体", + label: "创意复刻", + subtitle: "参考姿态、色块和镜头语言,生成差异化新主体", placeholder: "如:保留运动气质,去掉原服装和原脸", }, { @@ -318,12 +315,10 @@ const RECONSTRUCTION_MODES: Array<{ value: SubjectReconstructionMode; label: str }, ] -const SUBJECT_IMAGE_MODEL_OPTIONS: Array<{ value: SubjectImageModelPreference; label: string; detail: string }> = [ - { value: "auto", label: "自动", detail: "GPT 失败才兜底" }, - { value: "gpt-image-2", label: "GPT", detail: "只用 gpt-image-2" }, - { value: "gemini-3-pro-image-preview", label: "Gemini", detail: "直接用 Gemini" }, +const SUBJECT_MODEL_BUNDLE_OPTIONS: Array<{ value: SubjectModelBundle; label: string; detail: string }> = [ + { value: "gpt", label: "GPT 套件", detail: "GPT 对话 + gpt-image-2 生图" }, + { value: "gemini", label: "Gemini 套件", detail: "Gemini 对话 + Gemini 生图" }, ] -const SUBJECT_MODEL_MEMORY_KEY = "skg:subject-image-model:v1" const SUBJECT_PROMPT_MEMORY_KEY = "skg:subject-prompt-memory:v1" const SUBJECT_PROMPT_MEMORY_LIMIT = 28 @@ -680,21 +675,6 @@ function saveSubjectPromptMemory(jobId: string, memory: Record item.value === raw) ? raw as SubjectImageModelPreference : "auto" -} - -function saveSubjectImageModelPreference(jobId: string, value: SubjectImageModelPreference) { - if (typeof window === "undefined") return - try { - window.localStorage.setItem(subjectScopedStorageKey(SUBJECT_MODEL_MEMORY_KEY, jobId), value) - } catch { - /* localStorage may be unavailable */ - } -} - function subjectPromptChipsFromText(text: string): string[] { const normalized = text.replace(/[,。;;、\n]/g, ",").replace(/\s+/g, " ").trim() const rawParts = normalized.split(",").map((item) => item.trim()).filter(Boolean) @@ -717,13 +697,6 @@ function mergeSubjectPromptMemory(current: string[], text: string) { return [...chips, ...current.filter((item) => !chips.includes(item))].slice(0, SUBJECT_PROMPT_MEMORY_LIMIT) } -function appendSubjectPromptChip(text: string, chip: string) { - const trimmed = text.trim() - if (!trimmed) return chip - if (trimmed.includes(chip)) return trimmed - return `${trimmed},${chip}` -} - function formatSeconds(raw?: number) { if (!raw || Number.isNaN(raw)) return "0.0s" return `${raw.toFixed(1)}s` @@ -1100,6 +1073,32 @@ function reconstructionModeConfig(mode: SubjectReconstructionMode) { return RECONSTRUCTION_MODES.find((item) => item.value === mode) ?? RECONSTRUCTION_MODES[0] } +function subjectModelBundleConfig(bundle: SubjectModelBundle) { + return SUBJECT_MODEL_BUNDLE_OPTIONS.find((item) => item.value === bundle) ?? SUBJECT_MODEL_BUNDLE_OPTIONS[0] +} + +function subjectImageModelFromBundle(bundle: SubjectModelBundle): SubjectImageModelPreference { + return bundle === "gemini" ? "gemini-3-pro-image-preview" : "gpt-image-2" +} + +function subjectViewsForQuantity(quantity: number) { + const count = Math.max(1, Math.min(10, Math.round(quantity || 6))) + const views = [ + "front", + "three_quarter_left", + "left", + "back", + "right", + "three_quarter_right", + "bust_front", + "bust_left_45", + "bust_right_45", + "back_neck_detail", + ] + if (count <= 4) return ["front", "three_quarter_left", "back", "three_quarter_right"].slice(0, count) + return views.slice(0, count) +} + function cartoonStyleConfig(style: CartoonReconstructionStyle) { return CARTOON_RECONSTRUCTION_STYLES.find((item) => item.value === style) ?? CARTOON_RECONSTRUCTION_STYLES[0] } @@ -1129,13 +1128,14 @@ function buildReconstructionDirection( mode: SubjectReconstructionMode, direction: string, cartoonStyle: CartoonReconstructionStyle, + viewCount = RECONSTRUCTION_SUBJECT_VIEW_VALUES.length, ) { const trimmed = direction.trim() const style = cartoonStyleConfig(cartoonStyle) const common = [ "Legal-safe reference reconstruction: use selected reference frames only as non-identifying creative evidence.", "Do not copy the original person, face, biometric identity, unique likeness, watermark, platform UI, captions, exact outfit, exact background, exact composition, or source pixels.", - `Generate exactly ${RECONSTRUCTION_SUBJECT_VIEW_VALUES.length} separate views of one newly designed subject.`, + `Generate exactly ${viewCount} separate views of one newly designed subject.`, "Keep the neck, collarbone, shoulders, upper back, and side neck clean and usable for SKG neck-and-shoulder product placement.", ] if (mode === "realistic") { @@ -3306,10 +3306,15 @@ function SourceSubjectPipeline({ onDropFilmstripFrame?: (time: number) => void }) { const [referenceDropActive, setReferenceDropActive] = useState(false) - const [activeDropMode, setActiveDropMode] = useState(null) - const [conversionFrameIndicesByMode, setConversionFrameIndicesByMode] = useState>(() => ({ ...EMPTY_RECONSTRUCTION_FRAME_MAP })) + const [agentDropActive, setAgentDropActive] = useState(false) const [reconstructionDirections, setReconstructionDirections] = useState>(() => ({ ...DEFAULT_RECONSTRUCTION_DIRECTIONS })) - const [subjectImageModelPreference, setSubjectImageModelPreference] = useState(() => loadSubjectImageModelPreference(job.id)) + const [subjectModelBundle, setSubjectModelBundle] = useState(() => job.subject_agent?.model_bundle ?? "gpt") + const [agentReferenceFrameIndices, setAgentReferenceFrameIndices] = useState(() => job.subject_agent?.source_frame_indices ?? []) + const [agentMode, setAgentMode] = useState(() => job.subject_agent?.selected_mode ?? "custom") + const [agentQuantity, setAgentQuantity] = useState(() => job.subject_agent?.quantity ?? 6) + const [agentRequirement, setAgentRequirement] = useState(() => job.subject_agent?.requirements_zh ?? "") + const [agentInput, setAgentInput] = useState("") + const [subjectAgentBusy, setSubjectAgentBusy] = useState<"analyze" | "message" | null>(null) const [promptMemoryByMode, setPromptMemoryByMode] = useState>(() => loadSubjectPromptMemory(job.id)) const [cartoonStyle, setCartoonStyle] = useState("3d_animation") const [cartoonStyleOpen, setCartoonStyleOpen] = useState(false) @@ -3318,19 +3323,16 @@ function SourceSubjectPipeline({ const [expandedSubjectPackKey, setExpandedSubjectPackKey] = useState(null) const [lastSubjectProfile, setLastSubjectProfile] = useState(null) const subjectBusy = !!subjectBusyFor - const selectedSubjectViews = RECONSTRUCTION_SUBJECT_VIEW_VALUES - const conversionFramesByMode = useMemo(() => { - const next = {} as Record - for (const config of RECONSTRUCTION_MODES) { - next[config.value] = conversionFrameIndicesByMode[config.value] - .map((index) => frames.find((frame) => frame.index === index)) - .filter((frame): frame is KeyFrame => !!frame) - } - return next - }, [conversionFrameIndicesByMode, frames]) + const selectedSubjectViews = useMemo(() => subjectViewsForQuantity(agentQuantity), [agentQuantity]) const allConversionFrameIndices = useMemo( - () => new Set(Object.values(conversionFrameIndicesByMode).flat()), - [conversionFrameIndicesByMode], + () => new Set(agentReferenceFrameIndices), + [agentReferenceFrameIndices], + ) + const agentReferenceFrames = useMemo( + () => agentReferenceFrameIndices + .map((index) => frames.find((frame) => frame.index === index)) + .filter((frame): frame is KeyFrame => !!frame), + [agentReferenceFrameIndices, frames], ) const actorSources = useMemo(() => { const items: Array<{ frame: KeyFrame; element: KeyElement; mode: SubjectReconstructionMode }> = [] @@ -3403,21 +3405,30 @@ function SourceSubjectPipeline({ }, [subjectAssetPacks]) useEffect(() => { - setConversionFrameIndicesByMode({ ...EMPTY_RECONSTRUCTION_FRAME_MAP }) setReconstructionDirections({ ...DEFAULT_RECONSTRUCTION_DIRECTIONS }) - setSubjectImageModelPreference(loadSubjectImageModelPreference(job.id)) + setSubjectModelBundle(job.subject_agent?.model_bundle ?? "gpt") + setAgentReferenceFrameIndices(job.subject_agent?.source_frame_indices ?? []) + setAgentMode(job.subject_agent?.selected_mode ?? "custom") + setAgentQuantity(job.subject_agent?.quantity ?? 6) + setAgentRequirement(job.subject_agent?.requirements_zh ?? "") + setAgentInput("") + setSubjectAgentBusy(null) setPromptMemoryByMode(loadSubjectPromptMemory(job.id)) setLastSubjectProfile(null) setSubjectBusyFor(null) setSubjectAssetBusy(null) - setActiveDropMode(null) setCartoonStyleOpen(false) setExpandedSubjectPackKey(null) }, [job.id]) useEffect(() => { - saveSubjectImageModelPreference(job.id, subjectImageModelPreference) - }, [job.id, subjectImageModelPreference]) + const agent = job.subject_agent + setSubjectModelBundle(agent?.model_bundle ?? "gpt") + setAgentReferenceFrameIndices(agent?.source_frame_indices ?? []) + setAgentMode(agent?.selected_mode ?? "custom") + setAgentQuantity(agent?.quantity ?? 6) + setAgentRequirement(agent?.requirements_zh ?? "") + }, [job.id, job.subject_agent?.updated_at]) useEffect(() => { saveSubjectPromptMemory(job.id, promptMemoryByMode) @@ -3430,13 +3441,7 @@ function SourceSubjectPipeline({ }, [expandedSubjectPackKey, subjectAssetPacks]) useEffect(() => { - setConversionFrameIndicesByMode((current) => { - const next = {} as Record - for (const config of RECONSTRUCTION_MODES) { - next[config.value] = current[config.value].filter((index) => frames.some((frame) => frame.index === index)) - } - return next - }) + setAgentReferenceFrameIndices((current) => current.filter((index) => frames.some((frame) => frame.index === index))) }, [frames]) const buildSubjectProfileForRequest = () => { @@ -3452,22 +3457,9 @@ function SourceSubjectPipeline({ })) } - const applyPromptChip = (mode: SubjectReconstructionMode, chip: string) => { - setReconstructionDirections((current) => ({ - ...current, - [mode]: appendSubjectPromptChip(current[mode], chip), - })) - setPromptMemoryByMode((current) => ({ - ...current, - [mode]: [chip, ...(current[mode] || []).filter((item) => item !== chip)].slice(0, SUBJECT_PROMPT_MEMORY_LIMIT), - })) - } + const subjectModelLabel = (value: SubjectModelBundle) => subjectModelBundleConfig(value).label - const subjectModelLabel = (value: SubjectImageModelPreference) => { - return SUBJECT_IMAGE_MODEL_OPTIONS.find((item) => item.value === value)?.label ?? "自动" - } - - const generateSubjectPack = async (mode: SubjectReconstructionMode, sourceIndices = conversionFrameIndicesByMode[mode]) => { + const generateSubjectPack = async (mode: SubjectReconstructionMode, sourceIndices = agentReferenceFrameIndices) => { if (subjectBusyFor) { toast.warning("主体套图正在生成中,完成后再重生。") return @@ -3479,8 +3471,9 @@ function SourceSubjectPipeline({ const sourceFrames = sourceIndices .map((index) => frames.find((frame) => frame.index === index)) .filter((frame): frame is KeyFrame => !!frame) - const rawDirection = reconstructionDirections[mode].trim() - const sourceLockedReplication = mode === "custom" && !rawDirection + const agentPrompt = (job.subject_agent?.generation_prompt_en || agentRequirement || "").trim() + const rawDirection = (agentPrompt || reconstructionDirections[mode]).trim() + const sourceLockedReplication = mode === "realistic" || (mode === "custom" && !rawDirection) if (!sourceFrames.length && mode !== "custom") { toast.warning(`先把参考帧拖到${reconstructionModeConfig(mode).label}。`) return @@ -3499,8 +3492,8 @@ function SourceSubjectPipeline({ ? null : buildSubjectProfileForRequest() const subjectStyle = reconstructionSubjectStyle(mode) - const userDirection = buildReconstructionDirection(mode, reconstructionDirections[mode], cartoonStyle) - rememberPromptForMode(mode, reconstructionDirections[mode]) + const userDirection = buildReconstructionDirection(mode, rawDirection, cartoonStyle, selectedSubjectViews.length) + rememberPromptForMode(mode, rawDirection) const modeName = reconstructionElementName(mode) setSubjectBusyFor({ jobId: requestJobId, @@ -3509,7 +3502,7 @@ function SourceSubjectPipeline({ viewCount: selectedSubjectViews.length, sourceCount: sourceFrames.length, profileLabel: requestProfile?.summary ?? "按自主描述", - modelLabel: subjectModelLabel(subjectImageModelPreference), + modelLabel: subjectModelLabel(subjectModelBundle), }) try { let workingJob = job @@ -3539,9 +3532,9 @@ function SourceSubjectPipeline({ views: selectedSubjectViews, subject_profile: requestProfile?.payload ?? null, prompt: sourceLockedReplication - ? buildSourceLockedSubjectPrompt(subjectStyle) + ? `${buildSourceLockedSubjectPrompt(subjectStyle)} ${userDirection}` : buildSimilarSubjectPrompt(subjectStyle, userDirection, null, requestProfile), - image_model_preference: subjectImageModelPreference, + image_model_preference: subjectImageModelFromBundle(subjectModelBundle), replace_views: false, pack_label: `${reconstructionModeConfig(mode).label} ${new Date().toLocaleTimeString("zh-CN", { hour: "2-digit", minute: "2-digit", hour12: false })}`, pack_mode: mode, @@ -3564,39 +3557,14 @@ function SourceSubjectPipeline({ } } - const addConversionFrame = (mode: SubjectReconstructionMode, frame: KeyFrame) => { - const current = conversionFrameIndicesByMode[mode] - const existed = current.includes(frame.index) - const next = existed - ? current - : current.length >= RECONSTRUCTION_FRAME_LIMIT - ? [...current.slice(1), frame.index] - : [...current, frame.index] - setConversionFrameIndicesByMode((state) => ({ ...state, [mode]: next })) - if (existed) { - toast.info(`这张参考帧已经在${reconstructionModeConfig(mode).label}里。`) - return - } - if (current.length >= RECONSTRUCTION_FRAME_LIMIT) { - toast.warning(`${reconstructionModeConfig(mode).label}最多保留 ${RECONSTRUCTION_FRAME_LIMIT} 张参考帧,已替换为最近拖入的组合。`) - } - toast.info(`已加入${reconstructionModeConfig(mode).label}:${frame.timestamp.toFixed(1)}s。放好参考后点击生成 6 视图。`) - } - - const removeConversionFrame = (mode: SubjectReconstructionMode, frameIndex: number) => { - setConversionFrameIndicesByMode((state) => ({ - ...state, - [mode]: state[mode].filter((index) => index !== frameIndex), - })) - } - const regenerateSubjectAsset = async (item: { frame: KeyFrame; element: KeyElement; mode: SubjectReconstructionMode; asset: SubjectAsset }) => { const { frame, element, mode, asset } = item const sourceIndices = asset.source_frame_indices?.length ? asset.source_frame_indices - : conversionFrameIndicesByMode[mode] - const rawDirection = reconstructionDirections[mode].trim() - const sourceLockedReplication = mode === "custom" && !rawDirection + : agentReferenceFrameIndices + const agentPrompt = (job.subject_agent?.generation_prompt_en || agentRequirement || "").trim() + const rawDirection = (agentPrompt || reconstructionDirections[mode]).trim() + const sourceLockedReplication = mode === "realistic" || (mode === "custom" && !rawDirection) if (!sourceIndices.length && mode !== "custom") { toast.warning("转换层没有参考帧,不能重生。") return @@ -3611,7 +3579,7 @@ function SourceSubjectPipeline({ ? null : lastSubjectProfile ?? buildSubjectProfileForRequest() const subjectStyle = reconstructionSubjectStyle(mode) - rememberPromptForMode(mode, reconstructionDirections[mode]) + rememberPromptForMode(mode, rawDirection) const updated = await generateSubjectAssets(job.id, frame.index, element.id, { subject_kind: "living", subject_style: subjectStyle, @@ -3622,14 +3590,14 @@ function SourceSubjectPipeline({ views: [asset.view], subject_profile: requestProfile?.payload ?? null, prompt: sourceLockedReplication - ? buildSourceLockedSubjectPrompt(subjectStyle) + ? `${buildSourceLockedSubjectPrompt(subjectStyle)} ${buildReconstructionDirection(mode, rawDirection, cartoonStyle, 1)}` : buildSimilarSubjectPrompt( subjectStyle, - buildReconstructionDirection(mode, reconstructionDirections[mode], cartoonStyle), + buildReconstructionDirection(mode, rawDirection, cartoonStyle, 1), null, requestProfile, ), - image_model_preference: subjectImageModelPreference, + image_model_preference: subjectImageModelFromBundle(subjectModelBundle), replace_views: true, pack_id: asset.pack_id ?? "", pack_label: asset.pack_label ?? "", @@ -3659,6 +3627,98 @@ function SourceSubjectPipeline({ } } + const addAgentReferenceFrame = (frame: KeyFrame) => { + setAgentReferenceFrameIndices((current) => { + if (current.includes(frame.index)) { + toast.info("这张参考帧已经在转换层里。") + return current + } + const next = current.length >= RECONSTRUCTION_FRAME_LIMIT ? [...current.slice(1), frame.index] : [...current, frame.index] + if (current.length >= RECONSTRUCTION_FRAME_LIMIT) { + toast.warning(`最多保留 ${RECONSTRUCTION_FRAME_LIMIT} 张参考帧,已替换为最近拖入的组合。`) + } else { + toast.info(`已加入转换层:${frame.timestamp.toFixed(1)}s。`) + } + return next + }) + } + + const removeAgentReferenceFrame = (frameIndex: number) => { + setAgentReferenceFrameIndices((current) => current.filter((index) => index !== frameIndex)) + } + + const runSubjectAgentAnalyze = async () => { + if (!agentReferenceFrameIndices.length) { + toast.warning("先从左侧拖入 1-3 张参考帧,再开始分析。") + return + } + setSubjectAgentBusy("analyze") + try { + const updated = await analyzeSubjectAgent(job.id, { + model_bundle: subjectModelBundle, + source_frame_indices: agentReferenceFrameIndices, + }) + onJobUpdate(updated) + toast.success("转换层分析完成") + } catch (e) { + toast.error("转换层分析失败:" + (e instanceof Error ? e.message : String(e))) + } finally { + setSubjectAgentBusy(null) + } + } + + const sendSubjectAgentRequirement = async (message = agentInput) => { + const text = message.trim() + if (!text && !agentRequirement.trim()) { + toast.warning("先写一句要怎么生成,或者点快捷选项。") + return + } + setSubjectAgentBusy("message") + try { + const updated = await sendSubjectAgentMessage(job.id, { + model_bundle: subjectModelBundle, + source_frame_indices: agentReferenceFrameIndices, + selected_mode: agentMode, + selected_traits: job.subject_agent?.selected_traits ?? [], + requirements_zh: agentRequirement, + message: text, + quantity: agentQuantity, + }) + onJobUpdate(updated) + setAgentInput("") + } catch (e) { + toast.error("生图要求更新失败:" + (e instanceof Error ? e.message : String(e))) + } finally { + setSubjectAgentBusy(null) + } + } + + const toggleSubjectAgentTrait = (trait: string) => { + const selected = job.subject_agent?.selected_traits ?? [] + const next = selected.includes(trait) ? selected.filter((item) => item !== trait) : [...selected, trait].slice(0, 24) + void sendSubjectAgentMessage(job.id, { + model_bundle: subjectModelBundle, + source_frame_indices: agentReferenceFrameIndices, + selected_mode: agentMode, + selected_traits: next, + requirements_zh: agentRequirement, + message: next.includes(trait) ? `保留或强调:${trait}` : `不再强制:${trait}`, + quantity: agentQuantity, + }).then(onJobUpdate).catch((e) => { + toast.error("特征选择失败:" + (e instanceof Error ? e.message : String(e))) + }) + } + + const subjectAgent = job.subject_agent + const agentAnalysis = subjectAgent?.analysis ?? null + const agentMessages = subjectAgent?.messages ?? [] + const agentTraits = agentAnalysis?.trait_chips ?? [] + const selectedAgentTraits = subjectAgent?.selected_traits ?? [] + const canGenerateAgentPack = agentMode === "custom" + ? Boolean(agentRequirement.trim() || agentReferenceFrames.length) + : agentReferenceFrames.length > 0 + const agentModeRunning = runningActorModes.has(agentMode) + return (
@@ -3759,188 +3819,258 @@ function SourceSubjectPipeline({
} title="转换层" /> - + + {subjectModelBundleConfig(subjectModelBundle).detail} +
-
-
+
{ + if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return + event.preventDefault() + setAgentDropActive(true) + }} + onDragOver={(event) => { + if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return + event.preventDefault() + event.dataTransfer.dropEffect = "copy" + }} + onDragLeave={(event) => { + const next = event.relatedTarget as Node | null + if (next && event.currentTarget.contains(next)) return + setAgentDropActive(false) + }} + onDrop={(event) => { + event.preventDefault() + setAgentDropActive(false) + const frameIndex = Number(event.dataTransfer.getData(SOURCE_KEYFRAME_DRAG_TYPE)) + const frame = frames.find((item) => item.index === frameIndex) + if (frame) addAgentReferenceFrame(frame) + }} + > +
+ {SUBJECT_MODEL_BUNDLE_OPTIONS.map((option) => ( + + ))} +
+ +
- 生图模型 - 只影响转换层主体套图 + 参考图 + + {agentReferenceFrames.length}/{RECONSTRUCTION_FRAME_LIMIT} +
-
- {SUBJECT_IMAGE_MODEL_OPTIONS.map((option) => ( +
+ {agentReferenceFrames.map((frame, index) => ( +
+ {String(index + 1).padStart(2, "0")}} + /> + +
+ ))} + {!agentReferenceFrames.length ? ( +
+ 把左侧参考帧拖进来,再开始分析。 +
+ ) : null} +
+ +
+ +
+ {RECONSTRUCTION_MODES.map((modeConfig) => ( + + ))} +
+ + {agentMode === "cartoon" ? ( +
+ + {cartoonStyleOpen ? ( +
+ {CARTOON_RECONSTRUCTION_STYLES.map((style) => ( + + ))} +
+ ) : null} +
+ ) : null} + +
+ 数量 +
+ {[4, 6, 8, 10].map((count) => ( ))}
-
- 先拖入 1-3 张参考帧到对应方向,放好后再点击生成;系统只做参考重构,不复制原人、原脸或原画面。 -
-
- {RECONSTRUCTION_MODES.map((modeConfig) => { - const mode = modeConfig.value - const modeFrames = conversionFramesByMode[mode] - const promptChips = [...subjectPromptChipsFromText(reconstructionDirections[mode]), ...(promptMemoryByMode[mode] || [])] - .filter((chip, index, list) => chip && list.indexOf(chip) === index) - .slice(0, 10) - const dropActive = activeDropMode === mode - const canGenerate = mode === "custom" - ? Boolean(reconstructionDirections.custom.trim() || modeFrames.length) - : modeFrames.length > 0 - const modeRunning = runningActorModes.has(mode) - return ( -
{ - if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return - event.preventDefault() - setActiveDropMode(mode) - }} - onDragOver={(event) => { - if (!Array.from(event.dataTransfer.types).includes(SOURCE_KEYFRAME_DRAG_TYPE)) return - event.preventDefault() - event.dataTransfer.dropEffect = "copy" - }} - onDragLeave={(event) => { - const next = event.relatedTarget as Node | null - if (next && event.currentTarget.contains(next)) return - setActiveDropMode(null) - }} - onDrop={(event) => { - event.preventDefault() - setActiveDropMode(null) - const frameIndex = Number(event.dataTransfer.getData(SOURCE_KEYFRAME_DRAG_TYPE)) - const frame = frames.find((item) => item.index === frameIndex) - if (frame) addConversionFrame(mode, frame) - }} - > -
-
-
{modeConfig.label}
-
{modeConfig.subtitle}
-
- - {modeFrames.length}/{RECONSTRUCTION_FRAME_LIMIT} - -
-
- {modeFrames.map((frame, index) => ( -
- {String(index + 1).padStart(2, "0")}} - /> - -
- ))} - {!modeFrames.length ? ( -
- {mode === "custom" ? "可只写描述,也可拖入参考。" : "把参考帧拖到这里。"} -
- ) : null} -
- {mode === "cartoon" ? ( -
- - {cartoonStyleOpen ? ( -
- {CARTOON_RECONSTRUCTION_STYLES.map((style) => ( - - ))} -
- ) : null} -
- ) : null} -