auto-save 2026-05-18 15:07 (~5)

2026-05-18 15:08:05 +08:00
parent 47653ee319
commit ebac2e86b5
5 changed files with 40 additions and 24 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -238,8 +238,8 @@ JobStatus = Literal[
    "transcribing", "transcribed", "failed",
 ]

-KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "12"))
-FrameExtractTarget = Literal["transparent_human", "balanced", "subject", "transition", "expression", "motion"]
+KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "6"))
+FrameExtractTarget = Literal["random_subject", "transparent_human", "balanced", "subject", "transition", "expression", "motion"]
 FrameExtractMode = Literal["replace", "append"]
 FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"]
 AnalyzeTask = tuple[str, int, FrameExtractTarget, FrameExtractMode, FrameExtractQuality]
@@ -252,6 +252,7 @@ SceneMode = Literal["remove_subject", "similar", "style"]
 SceneStyle = Literal["source", "premium_product", "clean_studio", "warm_lifestyle", "cinematic"]
 SceneAssetRole = Literal["scene", "first_frame", "last_frame"]
 FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
+    "random_subject": "人物随机",
    "transparent_human": "透明骨架人",
    "balanced": "综合关键帧",
    "subject": "清晰主体",
@@ -1408,7 +1409,10 @@ def _target_score(item: dict, target: FrameExtractTarget) -> float:
    scene = float(item.get("scene_score_n", 0.0))
    motion = float(item.get("motion_n", 0.0))

-    if target == "transparent_human":
+    if target == "random_subject":
+        # 人物定向随机抽帧先用中心主体/清晰度形成候选池，再在池内随机取样。
+        score = center * 0.52 + sharp * 0.24 + contrast * 0.14 + color * 0.10
+    elif target == "transparent_human":
        # 当前抽帧阶段走本地算力：优先清晰中心主体、高对比、适度色彩和时间覆盖。
        # 透明骨架人的语义判断留给后续审核/识别，不在抽帧阶段逐帧调用 Vision。
        score = center * 0.45 + sharp * 0.30 + contrast * 0.15 + color * 0.10
@@ -1460,6 +1464,15 @@ def _select_keyframes(candidates: list[dict], n: int, target: FrameExtractTarget
        elif it["score"] > dup["score"]:
            deduped[deduped.index(dup)] = it

+    if target == "random_subject":
+        # 人物定向随机：从清晰、中心主体更强的候选池里随机抽，不再按动作峰值排序。
+        ranked = sorted(deduped, key=lambda x: -float(x.get("score", 0.0)))
+        pool_size = min(len(ranked), max(n * 6, n + 8))
+        pool = ranked[:pool_size] if pool_size > 0 else ranked
+        selected = random.sample(pool, k=min(n, len(pool))) if len(pool) > n else list(pool)
+        selected.sort(key=lambda x: x["idx"])
+        return selected
+
    # 时序分桶：把候选时间轴等分 n 段，每段取当前目标下最优的
    total = len(candidates)
    buckets: list[list[dict]] = [[] for _ in range(n)]
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -40,6 +40,7 @@ const VIDEO_FRAME_PANEL_ID = "video-frame-panel"
 const FLOATING_PANEL_IDS = new Set([KEYFRAME_PANEL_ID, VIDEO_FRAME_PANEL_ID])
 const DIRECT_VIDEO_GENERATION_PAUSED = true
 const FRAME_TARGET_LABELS: Record<FrameExtractTarget, string> = {
+  random_subject: "人物随机",
  transparent_human: "透明骨架人",
  balanced: "综合关键帧",
  subject: "清晰主体",
@@ -242,8 +243,8 @@ export default function Home() {
  const handleAnalyzeJob = useCallback(async (jobId: string, options?: { mode?: FrameExtractMode }) => {
    const targetJob = jobs.find((item) => item.id === jobId)
    if (!targetJob) return
-    const frameTarget = frameTargets[jobId] ?? "transparent_human"
-    const frameCount = frameCounts[jobId] ?? 12
+    const frameTarget = frameTargets[jobId] ?? "random_subject"
+    const frameCount = frameCounts[jobId] ?? 6
    const frameQuality = frameQualities[jobId] ?? "auto"
    const mode = options?.mode ?? (targetJob.frames.length > 0 ? "append" : "replace")
    setActiveJobId(jobId)
@@ -487,8 +488,8 @@ export default function Home() {
    const visualRunning = target.status === "splitting"
    if (!hasVisualResult && !visualRunning && !autoTriggeredRef.current.has(visualKey)) {
      autoTriggeredRef.current.add(visualKey)
-      const frameTarget = frameTargets[target.id] ?? "motion"
-      const frameCount = frameCounts[target.id] ?? 12
+      const frameTarget = frameTargets[target.id] ?? "random_subject"
+      const frameCount = frameCounts[target.id] ?? 6
      const frameQuality = frameQualities[target.id] ?? "accurate"
      try {
        const updated = await analyzeJob(target.id, frameCount, frameTarget, "replace", frameQuality)
--- a/web/components/ad-recreation-board.tsx
+++ b/web/components/ad-recreation-board.tsx
@@ -52,6 +52,7 @@ import { type NodeData } from "@/components/nodes"
 import { MediaAssetTile } from "@/components/media-asset-tile"

 const TARGETS: Array<{ value: FrameExtractTarget; label: string }> = [
+  { value: "random_subject", label: "人物随机" },
  { value: "balanced", label: "综合" },
  { value: "subject", label: "主体" },
  { value: "motion", label: "动作" },
@@ -1875,11 +1876,11 @@ function SourceReferenceBuildPanel({
      for (const frame of job.frames) {
        if (selectedFrames.has(frame.index)) onToggleFrame(frame.index)
      }
-      const updated = await analyzeJob(job.id, 12, "motion", "replace", "accurate")
+      const updated = await analyzeJob(job.id, 6, "random_subject", "replace", "accurate")
      onJobUpdate(updated)
-      toast.info("已按动作峰值逻辑重新抽取 12 张参考帧，完成后在这里人工选择主角参考。")
+      toast.info("已按人物定向随机逻辑重新抽取 6 张参考帧，完成后在这里人工选择主角参考。")
    } catch (e) {
-      toast.error("12 张关键帧抽取失败：" + (e instanceof Error ? e.message : String(e)))
+      toast.error("6 张关键帧抽取失败：" + (e instanceof Error ? e.message : String(e)))
    } finally {
      setExtracting(false)
    }
@@ -1887,7 +1888,7 @@ function SourceReferenceBuildPanel({

  const generateSimilarActor = async () => {
    if (!frames.length) {
-      toast.warning("请先自动抽帧 12 张，或在原版视频上手动补帧。")
+      toast.warning("请先自动抽帧 6 张，或在原版视频上手动补帧。")
      return
    }
    const baseFrame = subjectReferenceFrames[0]
@@ -2000,11 +2001,11 @@ function SourceReferenceBuildPanel({
            type="button"
            onClick={() => void extractKeyframes()}
            disabled={!job.video_url || extracting || job.status === "splitting"}
-            title="自动按动作峰值抽 12 张参考帧，更偏向手势、表情变化、节奏点和镜头变化"
+            title="自动按人物定向随机逻辑抽 6 张参考帧，保留手动当前点补帧"
            className="inline-flex h-8 items-center justify-center gap-1 rounded-md bg-white px-3 text-[11px] font-semibold text-black transition hover:bg-white/90 disabled:cursor-not-allowed disabled:opacity-40"
          >
            {extracting || job.status === "splitting" ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Scissors className="h-3.5 w-3.5" />}
-            自动抽帧 12 张
+            自动抽帧 6 张
          </button>
        </div>
      </div>
@@ -2039,7 +2040,7 @@ function SourceReferenceBuildPanel({
          })}
          {!frames.length && (
            <div className="col-span-full flex h-[106px] items-center justify-center rounded border border-dashed border-white/12 text-[11px] text-white/34">
-              点击“自动抽帧 12 张”，或在原版视频播放器上用“当前点抽帧”补充人物参考。
+              点击“自动抽帧 6 张”，或在原版视频播放器上用“当前点抽帧”补充人物参考。
            </div>
          )}
        </div>
@@ -3405,7 +3406,7 @@ function FrameExtractControls({
      </div>
      <div className="grid grid-cols-[1fr_1fr_72px] gap-2">
        <select
-          value={job ? data.frameTargets[job.id] ?? "transparent_human" : "balanced"}
+          value={job ? data.frameTargets[job.id] ?? "random_subject" : "random_subject"}
          onChange={(e) => job && data.onFrameTargetChange(job.id, e.target.value as FrameExtractTarget)}
          disabled={!job}
          className={controlClass}
@@ -3424,8 +3425,8 @@ function FrameExtractControls({
          type="number"
          min={1}
          max={20}
-          value={job ? data.frameCounts[job.id] ?? 12 : 12}
-          onChange={(e) => job && data.onFrameCountChange(job.id, Number(e.target.value) || 12)}
+          value={job ? data.frameCounts[job.id] ?? 6 : 6}
+          onChange={(e) => job && data.onFrameCountChange(job.id, Number(e.target.value) || 6)}
          disabled={!job}
          className={`${controlClass} text-center`}
        />
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -133,6 +133,7 @@ function clamp(value: number, min: number, max: number) {
 const THUMBNAIL_HEIGHT = 192
 const FLOATING_PANEL_EDGE_INSET = 8
 const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hint: string }> = [
+  { value: "random_subject", label: "人物随机", hint: "从清晰人物候选里随机抽取" },
  { value: "transparent_human", label: "透明骨架人", hint: "本地算力筛清晰主体，不逐帧调用 Vision" },
  { value: "balanced", label: "综合关键帧", hint: "清晰、去重、变化、时间覆盖" },
  { value: "subject", label: "清晰主体", hint: "人物 / 产品主体更清楚" },
@@ -140,7 +141,7 @@ const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hi
  { value: "expression", label: "表情瞬间", hint: "人物 / 动物表情倾向" },
  { value: "motion", label: "动作峰值", hint: "动作变化更明显" },
 ]
-const FRAME_COUNT_OPTIONS = [12, 8, 5, 3]
+const FRAME_COUNT_OPTIONS = [6, 12, 8, 5, 3]
 const FRAME_QUALITY_OPTIONS: Array<{ value: FrameExtractQuality; label: string; hint: string }> = [
  { value: "auto", label: "自动", hint: "展示友好：按电脑性能选择，最高只到精细" },
  { value: "fast", label: "快速", hint: "2fps / 360px，长视频省电" },
@@ -575,8 +576,8 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
            const aspectStr = ready ? `${j.width}/${j.height}` : "9/16"
            const thumbNaturalWidth = ready && j.height ? Math.max(96, Math.round(THUMBNAIL_HEIGHT * j.width / j.height)) : 96
            const toolWidth = Math.max(148, thumbNaturalWidth)
-            const target = d.frameTargets[j.id] ?? "transparent_human"
-            const count = d.frameCounts[j.id] ?? 12
+            const target = d.frameTargets[j.id] ?? "random_subject"
+            const count = d.frameCounts[j.id] ?? 6
            const quality = d.frameQualities[j.id] ?? "auto"
            const jHasFrames = j.frames.length > 0
            const jRunning = ["splitting", "transcribing"].includes(j.status)
@@ -815,8 +816,8 @@ export function VideoFramePanelNode({ data }: any) {
  const duration = panelJob.duration ?? 0
  const frames = [...panelJob.frames].sort((a, b) => a.timestamp - b.timestamp)
  const aspect = panelJob.width && panelJob.height ? `${panelJob.width}/${panelJob.height}` : "9/16"
-  const panelTarget = d.frameTargets[panelJob.id] ?? "transparent_human"
-  const panelCount = d.frameCounts[panelJob.id] ?? 12
+  const panelTarget = d.frameTargets[panelJob.id] ?? "random_subject"
+  const panelCount = d.frameCounts[panelJob.id] ?? 6
  const panelQuality = d.frameQualities[panelJob.id] ?? "auto"
  const panelRunning = ["splitting", "transcribing"].includes(panelJob.status)
  const dockText: Record<CanvasPanelDock, string> = {
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -417,7 +417,7 @@ export interface KeyFrame {
  generated_images?: GeneratedImage[]
 }

-export type FrameExtractTarget = "transparent_human" | "balanced" | "subject" | "transition" | "expression" | "motion"
+export type FrameExtractTarget = "random_subject" | "transparent_human" | "balanced" | "subject" | "transition" | "expression" | "motion"
 export type FrameExtractMode = "replace" | "append"
 export type FrameExtractQuality = "auto" | "fast" | "accurate" | "ultra"
 export type AssetBackground = "white" | "black"
@@ -691,7 +691,7 @@ export async function triggerTranscribe(id: string): Promise<Job> {

 export async function analyzeJob(
  id: string,
-  frames = 12,
+  frames = 6,
  target: FrameExtractTarget = "balanced",
  mode: FrameExtractMode = "replace",
  quality: FrameExtractQuality = "auto",