From f2663eb90e178a0cf04864d7b442564cfd974d23 Mon Sep 17 00:00:00 2001 From: kang Date: Thu, 14 May 2026 05:05:22 +0800 Subject: [PATCH] auto-save 2026-05-14 05:05 (~6) --- .memory/worklog.json | 13 ++ api/main.py | 47 +++++-- docs/source-analysis.html | 28 +++- web/components/lightbox.tsx | 231 ++++++++++++++++++++++++++++++++- web/components/nodes/index.tsx | 4 +- web/lib/api.ts | 6 +- 6 files changed, 312 insertions(+), 17 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index 0ea53f1..192c7da 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -3283,6 +3283,19 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-14 04:54 (~1)", "files_changed": 2 + }, + { + "ts": "2026-05-14T04:59:53+08:00", + "type": "commit", + "message": "auto-save 2026-05-14 04:59 (~3)", + "hash": "f1f3a0f", + "files_changed": 3 + }, + { + "ts": "2026-05-13T21:03:13Z", + "type": "session-heartbeat", + "message": "Claude 会话活跃 · 最近命令:claude · 4 项未提交变更 · 最近提交:auto-save 2026-05-14 04:59 (~3)", + "files_changed": 4 } ] } diff --git a/api/main.py b/api/main.py index 3948db2..b23559e 100644 --- a/api/main.py +++ b/api/main.py @@ -96,7 +96,7 @@ AssetBackground = Literal["white", "black"] AssetSize = Literal["source", "1024", "1536", "2048"] AssetQuality = Literal["hd"] SubjectKind = Literal["object", "living"] -SubjectView = Literal["front", "back", "left", "right", "side", "side_walk", "top", "bottom", "expression"] +SubjectView = str FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = { "balanced": "综合关键帧", "subject": "清晰主体", @@ -690,16 +690,43 @@ def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_pat return out_path -def _subject_view_labels(kind: SubjectKind) -> list[tuple[SubjectView, str]]: +SUBJECT_VIEW_LABELS: dict[str, str] = { + "front": "正面", + "back": "背面", + "left": "左侧", + "right": "右侧", + "side": "侧面", + "side_walk": "侧面走路", + "top": "顶部视角", + "bottom": "底部视角", + "expression_neutral": "中性表情", + "expression_happy": "开心表情", + "expression_angry": "生气表情", + "expression_sad": "难过表情", + "expression_relaxed": "放松表情", + "action_walk": "走路动作", + "action_sit": "坐姿动作", + "action_hold": "手持动作", + "action_use": "使用动作", +} + + +def _subject_view_labels(kind: SubjectKind, requested: list[str] | None = None) -> list[tuple[SubjectView, str]]: + if requested: + normalized: list[str] = [] + for raw in requested: + key = "".join(ch for ch in str(raw).strip().lower() if ch.isalnum() or ch == "_") + if key and key not in normalized: + normalized.append(key) + return [(key, SUBJECT_VIEW_LABELS.get(key, key.replace("_", " "))) for key in normalized[:12]] if kind == "living": return [ ("front", "正面站立"), ("back", "背面站立"), ("side", "侧面站立"), ("side_walk", "侧面走路"), - ("top", "顶部视角"), - ("bottom", "底部视角"), - ("expression", "表情参考"), + ("expression_neutral", "中性表情"), + ("expression_relaxed", "放松表情"), ] return [ ("front", "正面"), @@ -1911,6 +1938,7 @@ class GenerateSubjectAssetsReq(BaseModel): quality: AssetQuality = "hd" size: AssetSize = "source" source_frame_indices: list[int] | None = None + views: list[str] | None = None @app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job) @@ -2205,11 +2233,14 @@ def generate_subject_assets(job_id: str, idx: int, element_id: str, req: Generat models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"] generated: list[SubjectAsset] = [] try: - for view, view_label in _subject_view_labels(req.subject_kind): + for view, view_label in _subject_view_labels(req.subject_kind, req.views): if view == "side_walk": view_prompt = "side view in a natural walking pose, same identity and proportions" - elif view == "expression": - view_prompt = "clear expression reference, frontal or three-quarter standing pose, preserving the same identity" + elif view.startswith("expression_"): + emotion = view_label.replace("表情", "") + view_prompt = f"clear {emotion} facial expression reference, frontal or three-quarter standing pose, preserving the same identity" + elif view.startswith("action_"): + view_prompt = f"{view_label} reference pose, same identity and proportions" else: view_prompt = f"{view_label} view" prompt = ( diff --git a/docs/source-analysis.html b/docs/source-analysis.html index ed03041..f6e98ae 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -655,6 +655,8 @@ api/main.py index, timestamp, url, description, cleaned_url, cleaned_applied, + quality_report, + scene_assets: SceneAsset[], elements: KeyElement[], storyboard: StoryboardScene, generated_images: GeneratedImage[] @@ -669,7 +671,25 @@ api/main.py source: auto | manual | region, region, cutouts: string[], - cutout_id + cutout_id, + subject_kind: object | living, + subject_assets: SubjectAsset[] +} + +
+

SceneAsset / SubjectAsset

+

画面工作台素材准备阶段生成的组图资产。实际图片保存在 jobs/<jobId>/assets,可作为 asset 类型复制到分镜槽位。

+
SceneAsset {
+  id, label, url,
+  width, height, quality, size,
+  quality_report
+}
+
+SubjectAsset {
+  id, view, label, url,
+  background: white | black,
+  width, height, size,
+  source_frame_indices[]
 }
@@ -708,6 +728,8 @@ api/main.py 应用清洗POST /cleanup/applyapplyCleanedFrame物理覆盖 frames/{idx}.jpg,并备份原图。 元素增改删POST/PATCH/DELETE /elementsaddElement/updateElement/deleteElement让用户修正 Vision 错误,避免候选结果锁死。 元素提取POST /elements/{element_id}/cutoutcutoutElement调用图像模型生成独立白底素材图,每次累积一张 cutout。 + 场景资产POST /frames/{idx}/scene-assetgenerateSceneAsset为每张已选关键帧生成一张去水印、高清增强的场景图,保留历史版本用于人工审核。 + 主体资产包POST /elements/{element_id}/subject-assetsgenerateSubjectAssets根据用户选择的视图、动作和表情生成主体资产包;当多个关键帧都指向同一主体时,前端把已选关键帧作为 source_frame_indices 传入,后端拼参考板。 分镜保存PUT /frames/{idx}/storyboardupdateStoryboard保存 4 图槽、时长和改造说明。 生图POST /frames/{idx}/generategenerateImage基于关键帧或已选生成图做 image-to-image,目前可用。 @@ -729,9 +751,9 @@ api/main.py 画面工作台 Visual Lab - 在一个画布卡片里展示关键帧、元素 cutout 和视频任务;点击缩略图进入对应处理面板。 + 在一个画布卡片里展示关键帧、元素 cutout、场景图、主体资产包和视频任务状态;点击缩略图进入对应处理面板。关键帧详情面板负责生成场景资产和主体多视角/动作/表情资产。 不要在主卡片里堆复杂表单;主卡片只做状态总览和入口。 - VisualLabNodeFrameLightboxStoryboardWorkbench、视频任务接口 + VisualLabNodeFrameLightboxgenerateSceneAssetgenerateSubjectAssets、视频任务接口 分镜工作台 diff --git a/web/components/lightbox.tsx b/web/components/lightbox.tsx index 0914354..7d00f36 100644 --- a/web/components/lightbox.tsx +++ b/web/components/lightbox.tsx @@ -3,10 +3,10 @@ import { useEffect, useRef, useState } from "react" import { createPortal } from "react-dom" import { X, ChevronLeft, ChevronRight, Check, Sparkles, Wand2, Loader2, Eye, RefreshCw, Plus, Sparkle, Crop, Copy, PencilLine, Trash2, Save } from "lucide-react" import { - frameUrl, cleanedFrameUrl, cutoutUrl, + frameUrl, cleanedFrameUrl, cutoutUrl, apiAssetUrl, describeFrame, cleanupFrame, applyCleanedFrame, discardCleanedFrame, addElement, updateElement, deleteElement, cutoutElement, deleteCutout, - pushStoryboardImage, - type KeyFrame, type Job, type ImageRef, + pushStoryboardImage, generateSceneAsset, generateSubjectAssets, + type AssetBackground, type AssetSize, type KeyFrame, type Job, type ImageRef, type SubjectKind, } from "@/lib/api" import { toast } from "sonner" @@ -24,13 +24,42 @@ interface Props { embedded?: boolean } +const OBJECT_VIEW_OPTIONS = [ + ["front", "正面"], + ["back", "背面"], + ["left", "左侧"], + ["right", "右侧"], + ["top", "顶部"], + ["bottom", "底部"], +] + +const LIVING_VIEW_OPTIONS = [ + ["front", "正面"], + ["back", "背面"], + ["side", "侧面"], + ["side_walk", "走路"], + ["expression_happy", "喜"], + ["expression_angry", "怒"], + ["expression_sad", "哀"], + ["expression_relaxed", "乐/放松"], + ["action_sit", "坐"], + ["action_hold", "手持"], + ["action_use", "使用"], +] + export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, onChange, onToggleSelect, onJobUpdate, onSwitchPanel, onCopyImage, embedded = false }: Props) { const [describing, setDescribing] = useState(false) const [cleaning, setCleaning] = useState(false) const [applying, setApplying] = useState(false) const [cuttingId, setCuttingId] = useState(null) + const [sceneGenerating, setSceneGenerating] = useState(false) + const [subjectGenerating, setSubjectGenerating] = useState(null) const [addingZh, setAddingZh] = useState(false) const [addInput, setAddInput] = useState("") + const [assetSize, setAssetSize] = useState("source") + const [subjectKinds, setSubjectKinds] = useState>({}) + const [subjectBackgrounds, setSubjectBackgrounds] = useState>({}) + const [subjectViews, setSubjectViews] = useState>({}) const [editingElement, setEditingElement] = useState<{ id: string name_zh: string @@ -87,6 +116,9 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o const desc = f.description const elements = f.elements ?? [] const hasCleaned = !!f.cleaned_url + const latestSceneAsset = f.scene_assets?.[f.scene_assets.length - 1] ?? null + const selectedFrameIndices = Array.from(selected).sort((a, b) => a - b) + const sharedSubjectFrameIndices = selectedFrameIndices.length > 1 ? selectedFrameIndices : [f.index] const handleDescribe = async () => { setDescribing(true) @@ -116,6 +148,50 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o } } + const handleGenerateSceneAsset = async () => { + setSceneGenerating(true) + try { + const updated = await generateSceneAsset(jobId, f.index, { size: assetSize }) + onJobUpdate?.(updated) + toast.success(`分镜 ${f.index + 1} 场景图已生成`) + } catch (e) { + toast.error("场景图生成失败:" + (e instanceof Error ? e.message : String(e))) + } finally { + setSceneGenerating(false) + } + } + + const handleGenerateSubjectPackage = async (elementId: string) => { + const kind = subjectKinds[elementId] ?? "object" + const defaultViews = (kind === "living" ? LIVING_VIEW_OPTIONS : OBJECT_VIEW_OPTIONS).map(([value]) => value) + const views = subjectViews[elementId]?.length ? subjectViews[elementId] : defaultViews + setSubjectGenerating(elementId) + try { + const updated = await generateSubjectAssets(jobId, f.index, elementId, { + subject_kind: kind, + background: subjectBackgrounds[elementId] ?? "white", + size: assetSize, + source_frame_indices: sharedSubjectFrameIndices, + views, + }) + onJobUpdate?.(updated) + toast.success(`主体资产包已生成 · ${views.length} 张`) + } catch (e) { + toast.error("主体资产包生成失败:" + (e instanceof Error ? e.message : String(e))) + } finally { + setSubjectGenerating(null) + } + } + + const toggleSubjectView = (elementId: string, view: string, kind: SubjectKind) => { + const defaults = (kind === "living" ? LIVING_VIEW_OPTIONS : OBJECT_VIEW_OPTIONS).map(([value]) => value) + setSubjectViews((prev) => { + const current = prev[elementId] ?? defaults + const next = current.includes(view) ? current.filter((x) => x !== view) : [...current, view] + return { ...prev, [elementId]: next } + }) + } + const handleExtractRegion = async () => { // 提取语义只在恰好 1 个框时支持 if (regions.length !== 1 || !extractName.trim()) return @@ -548,6 +624,56 @@ export function FrameLightbox({ jobId, frames, activeIndex, selected, onClose, o {cleaning ? "清洗中…(5-15 秒)" : hasCleaned ? "重新清洗" : f.cleaned_applied ? "再次清洗" : "🧹 清洗水印"} +
+
+
场景图
+ +
+ {latestSceneAsset ? ( +
+ {latestSceneAsset.label} +
+ {latestSceneAsset.width}×{latestSceneAsset.height} + {onCopyImage && ( + + )} +
+
+ ) : null} + {latestSceneAsset?.quality_report?.warnings?.length ? ( +
+ {latestSceneAsset.quality_report.warnings[0]} +
+ ) : null} + +
+ +
+
+ {viewOptions.map(([value, label]) => { + const active = activeViews.includes(value) + return ( + + ) + })} +
+ {subjectAssets.length > 0 && ( +
+ {subjectAssets.slice(-12).map((asset) => ( +
+ {asset.label} +
+ {asset.label.replace(`${e.name_zh} · `, "")} +
+
+ {onCopyImage && ( + + )} +
+
+ ))} +
+ )} + ) })} diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx index df82a5a..a84cb01 100644 --- a/web/components/nodes/index.tsx +++ b/web/components/nodes/index.tsx @@ -1177,6 +1177,8 @@ export function VisualLabNode({ data, selected }: any) { const elementCrops = collectElementCrops(job) const cleanedCount = frames.filter((x) => x.cleaned_url).length const cutoutCount = frames.reduce((s, x) => s + (x.elements?.filter((e) => hasCutout(e)).length ?? 0), 0) + const sceneAssetCount = frames.reduce((s, x) => s + (x.scene_assets?.length ?? 0), 0) + const subjectAssetCount = frames.reduce((s, x) => s + (x.elements?.reduce((n, e) => n + (e.subject_assets?.length ?? 0), 0) ?? 0), 0) const runningVideo = videos.some((v) => v.status === "queued" || v.status === "in_progress") const completedVideos = videos.filter((v) => v.status === "completed" && v.url) const failedVideo = videos.some((v) => v.status === "failed") @@ -1485,7 +1487,7 @@ export function VisualLabNode({ data, selected }: any) {
{frames.length > 0 ? ( <> - {cleanedCount} 已清洗 · {cutoutCount} 已抠图 · {d.selectedFrames.size}/{frames.length} 入编排 · {completedVideos.length} 已完成 + {cleanedCount} 已清洗 · {sceneAssetCount} 场景图 · {subjectAssetCount || cutoutCount} 主体素材 · {d.selectedFrames.size}/{frames.length} 入编排 · {completedVideos.length} 已完成 ) : ( "解析后这里展示关键帧、元素和视频任务;具体处理仍在点击后的工作台完成。" diff --git a/web/lib/api.ts b/web/lib/api.ts index e82e5b6..7a1cea2 100644 --- a/web/lib/api.ts +++ b/web/lib/api.ts @@ -138,7 +138,7 @@ export type FrameExtractQuality = "auto" | "fast" | "accurate" | "ultra" export type AssetBackground = "white" | "black" export type AssetSize = "source" | "1024" | "1536" | "2048" export type SubjectKind = "object" | "living" -export type SubjectView = "front" | "back" | "left" | "right" | "side" | "side_walk" | "top" | "bottom" | "expression" +export type SubjectView = string export interface QualityReport { width: number @@ -441,7 +441,7 @@ export function representativeCutoutUrl( export async function pushStoryboardImage( jobId: string, - body: { kind: "keyframe" | "cutout"; frame_idx: number; element_id?: string | null; cutout_id?: string | null; label?: string }, + body: { kind: "keyframe" | "cutout" | "asset"; frame_idx: number; element_id?: string | null; cutout_id?: string | null; label?: string }, ): Promise { const res = await fetch(`${API_BASE}/jobs/${jobId}/storyboard-images`, { method: "POST", @@ -676,6 +676,7 @@ export async function generateSubjectAssets( background?: AssetBackground size?: AssetSize source_frame_indices?: number[] + views?: string[] } = {}, ): Promise { const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/subject-assets`, { @@ -687,6 +688,7 @@ export async function generateSubjectAssets( background: body.background ?? "white", size: body.size ?? "source", source_frame_indices: body.source_frame_indices ?? null, + views: body.views ?? null, }), }) if (!res.ok) {