auto-save 2026-05-18 15:07 (~5)

This commit is contained in:
2026-05-18 15:08:05 +08:00
parent 47653ee319
commit ebac2e86b5
5 changed files with 40 additions and 24 deletions

View File

@@ -238,8 +238,8 @@ JobStatus = Literal[
"transcribing", "transcribed", "failed",
]
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "12"))
FrameExtractTarget = Literal["transparent_human", "balanced", "subject", "transition", "expression", "motion"]
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "6"))
FrameExtractTarget = Literal["random_subject", "transparent_human", "balanced", "subject", "transition", "expression", "motion"]
FrameExtractMode = Literal["replace", "append"]
FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"]
AnalyzeTask = tuple[str, int, FrameExtractTarget, FrameExtractMode, FrameExtractQuality]
@@ -252,6 +252,7 @@ SceneMode = Literal["remove_subject", "similar", "style"]
SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"]
SceneAssetRole = Literal["scene", "first_frame", "last_frame"]
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
"random_subject": "人物随机",
"transparent_human": "透明骨架人",
"balanced": "综合关键帧",
"subject": "清晰主体",
@@ -1408,7 +1409,10 @@ def _target_score(item: dict, target: FrameExtractTarget) -> float:
scene = float(item.get("scene_score_n", 0.0))
motion = float(item.get("motion_n", 0.0))
if target == "transparent_human":
if target == "random_subject":
# 人物定向随机抽帧先用中心主体/清晰度形成候选池,再在池内随机取样。
score = center * 0.52 + sharp * 0.24 + contrast * 0.14 + color * 0.10
elif target == "transparent_human":
# 当前抽帧阶段走本地算力:优先清晰中心主体、高对比、适度色彩和时间覆盖。
# 透明骨架人的语义判断留给后续审核/识别,不在抽帧阶段逐帧调用 Vision。
score = center * 0.45 + sharp * 0.30 + contrast * 0.15 + color * 0.10
@@ -1460,6 +1464,15 @@ def _select_keyframes(candidates: list[dict], n: int, target: FrameExtractTarget
elif it["score"] > dup["score"]:
deduped[deduped.index(dup)] = it
if target == "random_subject":
# 人物定向随机:从清晰、中心主体更强的候选池里随机抽,不再按动作峰值排序。
ranked = sorted(deduped, key=lambda x: -float(x.get("score", 0.0)))
pool_size = min(len(ranked), max(n * 6, n + 8))
pool = ranked[:pool_size] if pool_size > 0 else ranked
selected = random.sample(pool, k=min(n, len(pool))) if len(pool) > n else list(pool)
selected.sort(key=lambda x: x["idx"])
return selected
# 时序分桶:把候选时间轴等分 n 段,每段取当前目标下最优的
total = len(candidates)
buckets: list[list[dict]] = [[] for _ in range(n)]

View File

@@ -40,6 +40,7 @@ const VIDEO_FRAME_PANEL_ID = "video-frame-panel"
const FLOATING_PANEL_IDS = new Set([KEYFRAME_PANEL_ID, VIDEO_FRAME_PANEL_ID])
const DIRECT_VIDEO_GENERATION_PAUSED = true
const FRAME_TARGET_LABELS: Record<FrameExtractTarget, string> = {
random_subject: "人物随机",
transparent_human: "透明骨架人",
balanced: "综合关键帧",
subject: "清晰主体",
@@ -242,8 +243,8 @@ export default function Home() {
const handleAnalyzeJob = useCallback(async (jobId: string, options?: { mode?: FrameExtractMode }) => {
const targetJob = jobs.find((item) => item.id === jobId)
if (!targetJob) return
const frameTarget = frameTargets[jobId] ?? "transparent_human"
const frameCount = frameCounts[jobId] ?? 12
const frameTarget = frameTargets[jobId] ?? "random_subject"
const frameCount = frameCounts[jobId] ?? 6
const frameQuality = frameQualities[jobId] ?? "auto"
const mode = options?.mode ?? (targetJob.frames.length > 0 ? "append" : "replace")
setActiveJobId(jobId)
@@ -487,8 +488,8 @@ export default function Home() {
const visualRunning = target.status === "splitting"
if (!hasVisualResult && !visualRunning && !autoTriggeredRef.current.has(visualKey)) {
autoTriggeredRef.current.add(visualKey)
const frameTarget = frameTargets[target.id] ?? "motion"
const frameCount = frameCounts[target.id] ?? 12
const frameTarget = frameTargets[target.id] ?? "random_subject"
const frameCount = frameCounts[target.id] ?? 6
const frameQuality = frameQualities[target.id] ?? "accurate"
try {
const updated = await analyzeJob(target.id, frameCount, frameTarget, "replace", frameQuality)

View File

@@ -52,6 +52,7 @@ import { type NodeData } from "@/components/nodes"
import { MediaAssetTile } from "@/components/media-asset-tile"
const TARGETS: Array<{ value: FrameExtractTarget; label: string }> = [
{ value: "random_subject", label: "人物随机" },
{ value: "balanced", label: "综合" },
{ value: "subject", label: "主体" },
{ value: "motion", label: "动作" },
@@ -1875,11 +1876,11 @@ function SourceReferenceBuildPanel({
for (const frame of job.frames) {
if (selectedFrames.has(frame.index)) onToggleFrame(frame.index)
}
const updated = await analyzeJob(job.id, 12, "motion", "replace", "accurate")
const updated = await analyzeJob(job.id, 6, "random_subject", "replace", "accurate")
onJobUpdate(updated)
toast.info("已按动作峰值逻辑重新抽取 12 张参考帧,完成后在这里人工选择主角参考。")
toast.info("已按人物定向随机逻辑重新抽取 6 张参考帧,完成后在这里人工选择主角参考。")
} catch (e) {
toast.error("12 张关键帧抽取失败:" + (e instanceof Error ? e.message : String(e)))
toast.error("6 张关键帧抽取失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
setExtracting(false)
}
@@ -1887,7 +1888,7 @@ function SourceReferenceBuildPanel({
const generateSimilarActor = async () => {
if (!frames.length) {
toast.warning("请先自动抽帧 12 张,或在原版视频上手动补帧。")
toast.warning("请先自动抽帧 6 张,或在原版视频上手动补帧。")
return
}
const baseFrame = subjectReferenceFrames[0]
@@ -2000,11 +2001,11 @@ function SourceReferenceBuildPanel({
type="button"
onClick={() => void extractKeyframes()}
disabled={!job.video_url || extracting || job.status === "splitting"}
title="自动按动作峰值抽 12 张参考帧,更偏向手势、表情变化、节奏点和镜头变化"
title="自动按人物定向随机逻辑抽 6 张参考帧,保留手动当前点补帧"
className="inline-flex h-8 items-center justify-center gap-1 rounded-md bg-white px-3 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
>
{extracting || job.status === "splitting" ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Scissors className="h-3.5 w-3.5" />}
12
6
</button>
</div>
</div>
@@ -2039,7 +2040,7 @@ function SourceReferenceBuildPanel({
})}
{!frames.length && (
<div className="col-span-full flex h-[106px] items-center justify-center rounded border border-dashed border-white/12 text-[11px] text-white/34">
12
6
</div>
)}
</div>
@@ -3405,7 +3406,7 @@ function FrameExtractControls({
</div>
<div className="grid grid-cols-[1fr_1fr_72px] gap-2">
<select
value={job ? data.frameTargets[job.id] ?? "transparent_human" : "balanced"}
value={job ? data.frameTargets[job.id] ?? "random_subject" : "random_subject"}
onChange={(e) => job && data.onFrameTargetChange(job.id, e.target.value as FrameExtractTarget)}
disabled={!job}
className={controlClass}
@@ -3424,8 +3425,8 @@ function FrameExtractControls({
type="number"
min={1}
max={20}
value={job ? data.frameCounts[job.id] ?? 12 : 12}
onChange={(e) => job && data.onFrameCountChange(job.id, Number(e.target.value) || 12)}
value={job ? data.frameCounts[job.id] ?? 6 : 6}
onChange={(e) => job && data.onFrameCountChange(job.id, Number(e.target.value) || 6)}
disabled={!job}
className={`${controlClass} text-center`}
/>

View File

@@ -133,6 +133,7 @@ function clamp(value: number, min: number, max: number) {
const THUMBNAIL_HEIGHT = 192
const FLOATING_PANEL_EDGE_INSET = 8
const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hint: string }> = [
{ value: "random_subject", label: "人物随机", hint: "从清晰人物候选里随机抽取" },
{ value: "transparent_human", label: "透明骨架人", hint: "本地算力筛清晰主体,不逐帧调用 Vision" },
{ value: "balanced", label: "综合关键帧", hint: "清晰、去重、变化、时间覆盖" },
{ value: "subject", label: "清晰主体", hint: "人物 / 产品主体更清楚" },
@@ -140,7 +141,7 @@ const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hi
{ value: "expression", label: "表情瞬间", hint: "人物 / 动物表情倾向" },
{ value: "motion", label: "动作峰值", hint: "动作变化更明显" },
]
const FRAME_COUNT_OPTIONS = [12, 8, 5, 3]
const FRAME_COUNT_OPTIONS = [6, 12, 8, 5, 3]
const FRAME_QUALITY_OPTIONS: Array<{ value: FrameExtractQuality; label: string; hint: string }> = [
{ value: "auto", label: "自动", hint: "展示友好:按电脑性能选择,最高只到精细" },
{ value: "fast", label: "快速", hint: "2fps / 360px长视频省电" },
@@ -575,8 +576,8 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
const aspectStr = ready ? `${j.width}/${j.height}` : "9/16"
const thumbNaturalWidth = ready && j.height ? Math.max(96, Math.round(THUMBNAIL_HEIGHT * j.width / j.height)) : 96
const toolWidth = Math.max(148, thumbNaturalWidth)
const target = d.frameTargets[j.id] ?? "transparent_human"
const count = d.frameCounts[j.id] ?? 12
const target = d.frameTargets[j.id] ?? "random_subject"
const count = d.frameCounts[j.id] ?? 6
const quality = d.frameQualities[j.id] ?? "auto"
const jHasFrames = j.frames.length > 0
const jRunning = ["splitting", "transcribing"].includes(j.status)
@@ -815,8 +816,8 @@ export function VideoFramePanelNode({ data }: any) {
const duration = panelJob.duration ?? 0
const frames = [...panelJob.frames].sort((a, b) => a.timestamp - b.timestamp)
const aspect = panelJob.width && panelJob.height ? `${panelJob.width}/${panelJob.height}` : "9/16"
const panelTarget = d.frameTargets[panelJob.id] ?? "transparent_human"
const panelCount = d.frameCounts[panelJob.id] ?? 12
const panelTarget = d.frameTargets[panelJob.id] ?? "random_subject"
const panelCount = d.frameCounts[panelJob.id] ?? 6
const panelQuality = d.frameQualities[panelJob.id] ?? "auto"
const panelRunning = ["splitting", "transcribing"].includes(panelJob.status)
const dockText: Record<CanvasPanelDock, string> = {

View File

@@ -417,7 +417,7 @@ export interface KeyFrame {
generated_images?: GeneratedImage[]
}
export type FrameExtractTarget = "transparent_human" | "balanced" | "subject" | "transition" | "expression" | "motion"
export type FrameExtractTarget = "random_subject" | "transparent_human" | "balanced" | "subject" | "transition" | "expression" | "motion"
export type FrameExtractMode = "replace" | "append"
export type FrameExtractQuality = "auto" | "fast" | "accurate" | "ultra"
export type AssetBackground = "white" | "black"
@@ -691,7 +691,7 @@ export async function triggerTranscribe(id: string): Promise<Job> {
export async function analyzeJob(
id: string,
frames = 12,
frames = 6,
target: FrameExtractTarget = "balanced",
mode: FrameExtractMode = "replace",
quality: FrameExtractQuality = "auto",