diff --git a/api/main.py b/api/main.py index 40eb343..37a3e68 100644 --- a/api/main.py +++ b/api/main.py @@ -238,8 +238,8 @@ JobStatus = Literal[ "transcribing", "transcribed", "failed", ] -KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "12")) -FrameExtractTarget = Literal["transparent_human", "balanced", "subject", "transition", "expression", "motion"] +KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "6")) +FrameExtractTarget = Literal["random_subject", "transparent_human", "balanced", "subject", "transition", "expression", "motion"] FrameExtractMode = Literal["replace", "append"] FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"] AnalyzeTask = tuple[str, int, FrameExtractTarget, FrameExtractMode, FrameExtractQuality] @@ -252,6 +252,7 @@ SceneMode = Literal["remove_subject", "similar", "style"] SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"] SceneAssetRole = Literal["scene", "first_frame", "last_frame"] FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = { + "random_subject": "人物随机", "transparent_human": "透明骨架人", "balanced": "综合关键帧", "subject": "清晰主体", @@ -1408,7 +1409,10 @@ def _target_score(item: dict, target: FrameExtractTarget) -> float: scene = float(item.get("scene_score_n", 0.0)) motion = float(item.get("motion_n", 0.0)) - if target == "transparent_human": + if target == "random_subject": + # 人物定向随机抽帧先用中心主体/清晰度形成候选池,再在池内随机取样。 + score = center * 0.52 + sharp * 0.24 + contrast * 0.14 + color * 0.10 + elif target == "transparent_human": # 当前抽帧阶段走本地算力:优先清晰中心主体、高对比、适度色彩和时间覆盖。 # 透明骨架人的语义判断留给后续审核/识别,不在抽帧阶段逐帧调用 Vision。 score = center * 0.45 + sharp * 0.30 + contrast * 0.15 + color * 0.10 @@ -1460,6 +1464,15 @@ def _select_keyframes(candidates: list[dict], n: int, target: FrameExtractTarget elif it["score"] > dup["score"]: deduped[deduped.index(dup)] = it + if target == "random_subject": + # 人物定向随机:从清晰、中心主体更强的候选池里随机抽,不再按动作峰值排序。 + ranked = sorted(deduped, key=lambda x: -float(x.get("score", 0.0))) + pool_size = min(len(ranked), max(n * 6, n + 8)) + pool = ranked[:pool_size] if pool_size > 0 else ranked + selected = random.sample(pool, k=min(n, len(pool))) if len(pool) > n else list(pool) + selected.sort(key=lambda x: x["idx"]) + return selected + # 时序分桶:把候选时间轴等分 n 段,每段取当前目标下最优的 total = len(candidates) buckets: list[list[dict]] = [[] for _ in range(n)] diff --git a/web/app/page.tsx b/web/app/page.tsx index 33360e7..ba51654 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -40,6 +40,7 @@ const VIDEO_FRAME_PANEL_ID = "video-frame-panel" const FLOATING_PANEL_IDS = new Set([KEYFRAME_PANEL_ID, VIDEO_FRAME_PANEL_ID]) const DIRECT_VIDEO_GENERATION_PAUSED = true const FRAME_TARGET_LABELS: Record = { + random_subject: "人物随机", transparent_human: "透明骨架人", balanced: "综合关键帧", subject: "清晰主体", @@ -242,8 +243,8 @@ export default function Home() { const handleAnalyzeJob = useCallback(async (jobId: string, options?: { mode?: FrameExtractMode }) => { const targetJob = jobs.find((item) => item.id === jobId) if (!targetJob) return - const frameTarget = frameTargets[jobId] ?? "transparent_human" - const frameCount = frameCounts[jobId] ?? 12 + const frameTarget = frameTargets[jobId] ?? "random_subject" + const frameCount = frameCounts[jobId] ?? 6 const frameQuality = frameQualities[jobId] ?? "auto" const mode = options?.mode ?? (targetJob.frames.length > 0 ? "append" : "replace") setActiveJobId(jobId) @@ -487,8 +488,8 @@ export default function Home() { const visualRunning = target.status === "splitting" if (!hasVisualResult && !visualRunning && !autoTriggeredRef.current.has(visualKey)) { autoTriggeredRef.current.add(visualKey) - const frameTarget = frameTargets[target.id] ?? "motion" - const frameCount = frameCounts[target.id] ?? 12 + const frameTarget = frameTargets[target.id] ?? "random_subject" + const frameCount = frameCounts[target.id] ?? 6 const frameQuality = frameQualities[target.id] ?? "accurate" try { const updated = await analyzeJob(target.id, frameCount, frameTarget, "replace", frameQuality) diff --git a/web/components/ad-recreation-board.tsx b/web/components/ad-recreation-board.tsx index c347fd0..0011b2b 100644 --- a/web/components/ad-recreation-board.tsx +++ b/web/components/ad-recreation-board.tsx @@ -52,6 +52,7 @@ import { type NodeData } from "@/components/nodes" import { MediaAssetTile } from "@/components/media-asset-tile" const TARGETS: Array<{ value: FrameExtractTarget; label: string }> = [ + { value: "random_subject", label: "人物随机" }, { value: "balanced", label: "综合" }, { value: "subject", label: "主体" }, { value: "motion", label: "动作" }, @@ -1875,11 +1876,11 @@ function SourceReferenceBuildPanel({ for (const frame of job.frames) { if (selectedFrames.has(frame.index)) onToggleFrame(frame.index) } - const updated = await analyzeJob(job.id, 12, "motion", "replace", "accurate") + const updated = await analyzeJob(job.id, 6, "random_subject", "replace", "accurate") onJobUpdate(updated) - toast.info("已按动作峰值逻辑重新抽取 12 张参考帧,完成后在这里人工选择主角参考。") + toast.info("已按人物定向随机逻辑重新抽取 6 张参考帧,完成后在这里人工选择主角参考。") } catch (e) { - toast.error("12 张关键帧抽取失败:" + (e instanceof Error ? e.message : String(e))) + toast.error("6 张关键帧抽取失败:" + (e instanceof Error ? e.message : String(e))) } finally { setExtracting(false) } @@ -1887,7 +1888,7 @@ function SourceReferenceBuildPanel({ const generateSimilarActor = async () => { if (!frames.length) { - toast.warning("请先自动抽帧 12 张,或在原版视频上手动补帧。") + toast.warning("请先自动抽帧 6 张,或在原版视频上手动补帧。") return } const baseFrame = subjectReferenceFrames[0] @@ -2000,11 +2001,11 @@ function SourceReferenceBuildPanel({ type="button" onClick={() => void extractKeyframes()} disabled={!job.video_url || extracting || job.status === "splitting"} - title="自动按动作峰值抽 12 张参考帧,更偏向手势、表情变化、节奏点和镜头变化" + title="自动按人物定向随机逻辑抽 6 张参考帧,保留手动当前点补帧" className="inline-flex h-8 items-center justify-center gap-1 rounded-md bg-white px-3 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40" > {extracting || job.status === "splitting" ? : } - 自动抽帧 12 张 + 自动抽帧 6 张 @@ -2039,7 +2040,7 @@ function SourceReferenceBuildPanel({ })} {!frames.length && (
- 点击“自动抽帧 12 张”,或在原版视频播放器上用“当前点抽帧”补充人物参考。 + 点击“自动抽帧 6 张”,或在原版视频播放器上用“当前点抽帧”补充人物参考。
)} @@ -3405,7 +3406,7 @@ function FrameExtractControls({