From 6eb1f98e06f2879194f7a3a813cec98cec9e48b9 Mon Sep 17 00:00:00 2001
From: kang <wankang2050@gmail.com>
Date: Thu, 14 May 2026 03:53:51 +0800
Subject: [PATCH] auto-save 2026-05-14 03:53 (~5)

---
 .memory/worklog.json           |  19 +++
 api/main.py                    | 226 ++++++++++++++++++++++++---------
 web/app/page.tsx               |  20 ++-
 web/components/nodes/index.tsx |  30 ++++-
 web/lib/api.ts                 |   7 +-
 5 files changed, 234 insertions(+), 68 deletions(-)

diff --git a/.memory/worklog.json b/.memory/worklog.json
index dbdc836..0896281 100644
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -3102,6 +3102,25 @@
       "type": "session-heartbeat",
       "message": "Claude 会话活跃 · 最近命令：claude · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 03:42 (~3)",
       "files_changed": 1
+    },
+    {
+      "ts": "2026-05-14T03:48:20+08:00",
+      "type": "commit",
+      "message": "auto-save 2026-05-14 03:48 (~1)",
+      "hash": "9572111",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-13T19:48:49Z",
+      "type": "session-heartbeat",
+      "message": "Codex 会话活跃 · 最近命令：codex · 1 项未提交变更 · 最近提交：auto-save 2026-05-14 03:48 (~1)",
+      "files_changed": 1
+    },
+    {
+      "ts": "2026-05-13T19:53:12Z",
+      "type": "session-heartbeat",
+      "message": "Claude 会话活跃 · 最近命令：claude · 5 项未提交变更 · 最近提交：auto-save 2026-05-14 03:48 (~1)",
+      "files_changed": 5
     }
   ]
 }
diff --git a/api/main.py b/api/main.py
index 230ae5f..5ce7858 100644
--- a/api/main.py
+++ b/api/main.py
@@ -88,6 +88,14 @@ JobStatus = Literal[
 ]
 
 KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "5"))
+FrameExtractTarget = Literal["balanced", "subject", "transition", "expression", "motion"]
+FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
+    "balanced": "综合关键帧",
+    "subject": "清晰主体",
+    "transition": "转场变化",
+    "expression": "表情瞬间",
+    "motion": "动作峰值",
+}
 
 
 class GeneratedImage(BaseModel):
@@ -383,37 +391,115 @@ import numpy as np
 from PIL import Image, ImageEnhance, ImageFilter, ImageOps
 
 
-def _sharpness(img_path: Path) -> float:
+def _sharpness_from_gray(g: np.ndarray) -> float:
     """Laplacian variance：值越大越清晰，模糊/转场帧值低。"""
-    g = np.asarray(Image.open(img_path).convert("L").resize((320, 180)), dtype=np.float32)
     lap = (-4 * g[1:-1, 1:-1]
            + g[:-2, 1:-1] + g[2:, 1:-1] + g[1:-1, :-2] + g[1:-1, 2:])
     return float(lap.var())
 
 
-def _select_keyframes(candidates: list[Path], n: int, dup_threshold: int = 8) -> list[Path]:
+def _frame_metrics(img_path: Path, idx: int, timestamp: float) -> dict | None:
+    """低清候选帧的本地评分特征。只用于排序，最终仍从原视频抽原尺寸帧。"""
+    try:
+        with Image.open(img_path) as raw:
+            img = raw.convert("RGB")
+            h = imagehash.phash(img)
+            small = img.resize((160, 90))
+    except Exception:
+        return None
+
+    arr = np.asarray(small, dtype=np.float32)
+    # Rec. 601 luma，保留 0-255 范围，便于和清晰度 / 对比度阈值一起看。
+    gray = (0.299 * arr[:, :, 0] + 0.587 * arr[:, :, 1] + 0.114 * arr[:, :, 2]).astype(np.float32)
+    center = gray[22:68, 40:120]
+    rg = arr[:, :, 0] - arr[:, :, 1]
+    yb = 0.5 * (arr[:, :, 0] + arr[:, :, 1]) - arr[:, :, 2]
+    colorfulness = float(np.sqrt(rg.var() + yb.var()) + 0.3 * np.sqrt(rg.mean() ** 2 + yb.mean() ** 2))
+    return {
+        "path": img_path,
+        "idx": idx,
+        "timestamp": timestamp,
+        "hash": h,
+        "gray": gray,
+        "sharp": _sharpness_from_gray(gray),
+        "center_sharp": _sharpness_from_gray(center),
+        "brightness": float(gray.mean()),
+        "contrast": float(gray.std()),
+        "colorfulness": colorfulness,
+        "scene_score": 0.0,
+        "motion": 0.0,
+    }
+
+
+def _attach_temporal_metrics(items: list[dict]) -> None:
+    """相邻低清帧差异：转场 / 动作目标依赖它，不需要逐帧高分辨率扫描。"""
+    for i, it in enumerate(items):
+        prev_delta = 0.0
+        next_delta = 0.0
+        if i > 0:
+            prev_delta = float(np.mean(np.abs(it["gray"] - items[i - 1]["gray"])) / 255.0)
+        if i + 1 < len(items):
+            next_delta = float(np.mean(np.abs(items[i + 1]["gray"] - it["gray"])) / 255.0)
+        it["scene_score"] = max(prev_delta, next_delta)
+        it["motion"] = (prev_delta + next_delta) / 2.0
+
+
+def _normalize_item_metrics(items: list[dict]) -> None:
+    for key in ("sharp", "center_sharp", "contrast", "colorfulness", "scene_score", "motion"):
+        vals = [float(it.get(key, 0.0)) for it in items if float(it.get(key, 0.0)) > 0]
+        cap = float(np.percentile(vals, 95)) if vals else 1.0
+        if cap <= 0:
+            cap = 1.0
+        for it in items:
+            it[f"{key}_n"] = min(float(it.get(key, 0.0)) / cap, 1.0)
+
+
+def _target_score(item: dict, target: FrameExtractTarget) -> float:
+    sharp = float(item.get("sharp_n", 0.0))
+    center = float(item.get("center_sharp_n", 0.0))
+    contrast = float(item.get("contrast_n", 0.0))
+    color = float(item.get("colorfulness_n", 0.0))
+    scene = float(item.get("scene_score_n", 0.0))
+    motion = float(item.get("motion_n", 0.0))
+
+    if target == "subject":
+        score = center * 0.48 + sharp * 0.25 + contrast * 0.17 + color * 0.10
+    elif target == "transition":
+        score = scene * 0.55 + sharp * 0.28 + contrast * 0.12 + color * 0.05
+    elif target == "expression":
+        # 没有额外视觉模型时，表情/动物瞬间只能用中心细节 + 清晰 + 轻微动作变化做本地近似。
+        score = center * 0.40 + sharp * 0.24 + motion * 0.18 + contrast * 0.12 + color * 0.06
+    elif target == "motion":
+        score = motion * 0.45 + sharp * 0.30 + center * 0.15 + contrast * 0.10
+    else:
+        score = sharp * 0.45 + scene * 0.22 + center * 0.15 + contrast * 0.12 + color * 0.06
+
+    brightness = float(item.get("brightness", 0.0))
+    raw_contrast = float(item.get("contrast", 0.0))
+    if raw_contrast < 4 or brightness < 8 or brightness > 247:
+        return score * 0.15
+    if raw_contrast < 9:
+        return score * 0.65
+    return score
+
+
+def _select_keyframes(candidates: list[dict], n: int, target: FrameExtractTarget, dup_threshold: int = 8) -> list[dict]:
     """
-    candidates: 按时间排序的候选帧路径
+    candidates: 按时间排序的低清候选帧评分项
     n: 目标帧数
     dup_threshold: pHash 汉明距离 < 此值视为相似（默认 8，64bit hash 大致 ~12.5% 像素差）
     """
     if len(candidates) <= n:
         return candidates
 
-    # 算 pHash + sharpness
-    items = []
-    for i, p in enumerate(candidates):
-        try:
-            img = Image.open(p)
-            h = imagehash.phash(img)
-            s = _sharpness(p)
-            items.append({"path": p, "idx": i, "hash": h, "sharp": s})
-        except Exception:
-            continue
+    _attach_temporal_metrics(candidates)
+    _normalize_item_metrics(candidates)
+    for it in candidates:
+        it["score"] = _target_score(it, target)
 
-    # 去重：相似帧保留 sharpness 高的
+    # 去重：相似帧保留当前目标下分数更高的
     deduped: list[dict] = []
-    for it in items:
+    for it in candidates:
         dup = None
         for kept in deduped:
             if (it["hash"] - kept["hash"]) < dup_threshold:
@@ -421,10 +507,10 @@ def _select_keyframes(candidates: list[Path], n: int, dup_threshold: int = 8) ->
                 break
         if dup is None:
             deduped.append(it)
-        elif it["sharp"] > dup["sharp"]:
+        elif it["score"] > dup["score"]:
             deduped[deduped.index(dup)] = it
 
-    # 时序分桶：把候选时间轴等分 n 段，每段取去重后 sharpness 最高的
+    # 时序分桶：把候选时间轴等分 n 段，每段取当前目标下最优的
     total = len(candidates)
     buckets: list[list[dict]] = [[] for _ in range(n)]
     for it in deduped:
@@ -434,18 +520,18 @@ def _select_keyframes(candidates: list[Path], n: int, dup_threshold: int = 8) ->
     selected: list[dict] = []
     for b in buckets:
         if b:
-            selected.append(max(b, key=lambda x: x["sharp"]))
+            selected.append(max(b, key=lambda x: x["score"]))
 
-    # 空桶补足：从未选的 deduped 里按 sharpness 排序补
+    # 空桶补足：从未选的 deduped 里按目标分数补
     chosen_paths = {it["path"] for it in selected}
     remaining = sorted([it for it in deduped if it["path"] not in chosen_paths],
-                       key=lambda x: -x["sharp"])
+                       key=lambda x: -x["score"])
     while len(selected) < n and remaining:
         selected.append(remaining.pop(0))
 
     # 按时间排序输出
     selected.sort(key=lambda x: x["idx"])
-    return [it["path"] for it in selected]
+    return selected
 
 
 def ffprobe_meta(mp4: Path) -> dict:
@@ -492,7 +578,11 @@ async def pipeline_download(job_id: str) -> None:
         update(job, status="failed", error=str(e), message="下载失败")
 
 
-async def pipeline_analyze(job_id: str, frame_count: int = KEYFRAME_COUNT) -> None:
+async def pipeline_analyze(
+    job_id: str,
+    frame_count: int = KEYFRAME_COUNT,
+    target: FrameExtractTarget = "balanced",
+) -> None:
     """阶段 2：拆音轨 + 抽关键帧。ASR/翻译是独立文案轨，不阻塞视觉素材流。"""
     job = JOBS[job_id]
     d = job_dir(job_id)
@@ -510,62 +600,73 @@ async def pipeline_analyze(job_id: str, frame_count: int = KEYFRAME_COUNT) -> No
         ])
 
         n = max(1, min(int(frame_count), 20))
-        # 候选数：n 的 6 倍或至少 24，封顶 60
-        candidate_count = max(24, min(60, n * 6))
+        target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
+        duration = max(float(job.duration or 1.0), 0.1)
+        scan_fps = min(2.0, max(0.02, 180.0 / duration))
+        estimated_scan_count = max(1, int(duration * scan_fps))
 
-        update(job, message=f"抽取候选 {candidate_count} 张…", progress=45)
+        update(job, message=f"低清扫描候选 · {target_label} · 约 {estimated_scan_count} 帧…", progress=45)
         frames_dir = d / "frames"
         if frames_dir.exists():
             shutil.rmtree(frames_dir)
         frames_dir.mkdir(parents=True)
-        cand_dir = d / "candidates"
-        if cand_dir.exists():
-            shutil.rmtree(cand_dir)
-        cand_dir.mkdir(parents=True)
+        scan_dir = d / "frame_scan"
+        if scan_dir.exists():
+            shutil.rmtree(scan_dir)
+        scan_dir.mkdir(parents=True)
 
-        # 1) 均匀采样大批候选（fast seek，每张 < 0.5s）
-        duration = max(float(job.duration or 1.0), 0.1)
-        step = duration / (candidate_count + 1)
-        candidate_meta: list[tuple[Path, float]] = []  # (path, timestamp)
-        for i in range(candidate_count):
-            t = step * (i + 1)
-            out = cand_dir / f"c_{i:03d}.jpg"
+        # 1) 低分辨率、低帧率扫描。扫描图只用于候选评分，最终不直接作为关键帧。
+        run([
+            "ffmpeg", "-y", "-i", str(mp4),
+            "-vf", f"fps={scan_fps:.4f},scale=360:-2",
+            "-q:v", "4",
+            str(scan_dir / "s_%05d.jpg"),
+        ])
+
+        scan_paths = sorted(scan_dir.glob("s_*.jpg"))
+        if not scan_paths:
+            raise RuntimeError("低清扫描没有生成候选帧")
+
+        candidates: list[dict] = []
+        for i, p in enumerate(scan_paths):
+            t = min(i / scan_fps, max(duration - 0.05, 0.0))
+            item = _frame_metrics(p, i, t)
+            if item:
+                candidates.append(item)
+        if not candidates:
+            raise RuntimeError("候选帧评分失败")
+
+        # 2) 目标化筛选：pHash 去重 + 清晰度 / 中心细节 / 转场变化 / 动作强度 + 时序分桶。
+        update(job, message=f"{target_label}筛选 {n} / {len(candidates)} 张…", progress=60)
+        chosen = _select_keyframes(candidates, n, target)
+
+        # 3) 只对最终选中的时间点，从原视频抽高质量关键帧。
+        renamed: list[KeyFrame] = []
+        chosen_sorted = sorted(chosen, key=lambda it: float(it["timestamp"]))
+        for i, item in enumerate(chosen_sorted):
+            dst = frames_dir / f"{i:03d}.jpg"
+            t = float(item["timestamp"])
             run([
-                "ffmpeg", "-y", "-ss", str(t), "-i", str(mp4),
+                "ffmpeg", "-y", "-ss", f"{t:.3f}", "-i", str(mp4),
                 "-frames:v", "1",
                 "-pix_fmt", "yuvj420p", "-q:v", "3",
-                str(out),
+                str(dst),
             ])
-            if out.exists():
-                candidate_meta.append((out, t))
-
-        # 2) D 启发式选 n 张：pHash 去重 + Laplacian 清晰度 + 时序分桶
-        update(job, message=f"启发式筛选 {n} / {len(candidate_meta)} 张…", progress=60)
-        cand_paths = [m[0] for m in candidate_meta]
-        ts_by_path = {m[0]: m[1] for m in candidate_meta}
-        chosen = _select_keyframes(cand_paths, n)
-
-        # 3) 落盘到 frames/<idx>.jpg
-        renamed: list[KeyFrame] = []
-        chosen_sorted = sorted(chosen, key=lambda p: ts_by_path[p])
-        for i, src in enumerate(chosen_sorted):
-            dst = frames_dir / f"{i:03d}.jpg"
-            shutil.copyfile(src, dst)
             renamed.append(KeyFrame(
                 index=i,
-                timestamp=round(ts_by_path[src], 2),
+                timestamp=round(t, 2),
                 url=f"/jobs/{job_id}/frames/{i}.jpg",
             ))
 
-        # 4) 清理候选目录
-        shutil.rmtree(cand_dir, ignore_errors=True)
+        # 4) 清理扫描目录
+        shutil.rmtree(scan_dir, ignore_errors=True)
 
         update(
             job,
             status="frames_extracted",
             frames=renamed,
             progress=70,
-            message=f"已抽取 {len(renamed)} 张关键帧 · 可继续清洗 / 提取元素 / 分镜编排",
+            message=f"已按「{target_label}」抽取 {len(renamed)} 张关键帧 · 可继续清洗 / 提取元素 / 分镜编排",
         )
 
     except Exception as e:
@@ -934,13 +1035,18 @@ async def create_job_from_upload(bg: BackgroundTasks, file: UploadFile = File(..
 
 
 @app.post("/jobs/{job_id}/analyze", response_model=Job)
-async def trigger_analyze(job_id: str, bg: BackgroundTasks, frames: int = KEYFRAME_COUNT) -> Job:
+async def trigger_analyze(
+    job_id: str,
+    bg: BackgroundTasks,
+    frames: int = KEYFRAME_COUNT,
+    target: FrameExtractTarget = "balanced",
+) -> Job:
     job = JOBS.get(job_id)
     if not job:
         raise HTTPException(404, "job not found")
     if job.status not in {"downloaded", "frames_extracted", "transcribed", "failed"}:
         raise HTTPException(409, f"status must be downloaded/failed, got {job.status}")
-    bg.add_task(pipeline_analyze, job_id, frames)
+    bg.add_task(pipeline_analyze, job_id, frames, target)
     return job
 
 
diff --git a/web/app/page.tsx b/web/app/page.tsx
index 0df53d5..aa2f22e 100644
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -19,7 +19,7 @@ import { ThemeToggle } from "@/components/theme-toggle"
 import {
   addManualFrame, analyzeJob, createJob, getJob, listJobs, uploadJob, deleteJob, deleteFrame, deleteGeneratedImage,
   deleteGeneratedVideo, deleteCutout, generateStoryboardVideo,
-  type Job, type ImageRef, type StoryboardScene,
+  type Job, type ImageRef, type StoryboardScene, type FrameExtractTarget,
 } from "@/lib/api"
 
 const NODE_TYPES = {
@@ -34,6 +34,13 @@ const NODE_TYPES = {
 const KEYFRAME_PANEL_ID = "keyframe-detail-panel"
 const VIDEO_FRAME_PANEL_ID = "video-frame-panel"
 const FLOATING_PANEL_IDS = new Set([KEYFRAME_PANEL_ID, VIDEO_FRAME_PANEL_ID])
+const FRAME_TARGET_LABELS: Record<FrameExtractTarget, string> = {
+  balanced: "综合关键帧",
+  subject: "清晰主体",
+  transition: "转场变化",
+  expression: "表情瞬间",
+  motion: "动作峰值",
+}
 
 // 合并 input + download + split 为一个节点
 // 分叉：上路 input → visual lab ↘
@@ -85,6 +92,7 @@ export default function Home() {
   const job = useMemo(() => jobs.find((j) => j.id === activeJobId) ?? null, [jobs, activeJobId])
   const [submitting, setSubmitting] = useState(false)
   const [analyzing, setAnalyzing] = useState(false)
+  const [frameTarget, setFrameTarget] = useState<FrameExtractTarget>("balanced")
   const [selectedFrames, setSelectedFrames] = useState<Set<number>>(new Set())
   const [expandedFrame, setExpandedFrame] = useState<number | null>(null)
   const [framePanelScale, setFramePanelScale] = useState(1)
@@ -163,16 +171,16 @@ export default function Home() {
     setAnalyzing(true)
     setSelectedFrames(new Set())
     try {
-      await analyzeJob(job.id, 5)
-      toast.info("开始解析：拆轨 → 抽帧。声音文案轨单独处理")
+      await analyzeJob(job.id, 5, frameTarget)
+      toast.info(`开始解析：拆轨 → ${FRAME_TARGET_LABELS[frameTarget]}抽帧。声音文案轨单独处理`)
       // 乐观更新本地状态，让轮询 useEffect 重新启动
-      setJob((prev) => prev ? { ...prev, status: "splitting", message: "拆轨中…", progress: 30 } : prev)
+      setJob((prev) => prev ? { ...prev, status: "splitting", message: `拆轨中 · ${FRAME_TARGET_LABELS[frameTarget]}…`, progress: 30 } : prev)
     } catch (e) {
       toast.error("解析触发失败：" + (e instanceof Error ? e.message : String(e)))
     } finally {
       setAnalyzing(false)
     }
-  }, [job?.id])
+  }, [job?.id, frameTarget])
 
   const handleAddManualFrameForJob = useCallback(async (jobId: string, t: number) => {
     try {
@@ -496,6 +504,7 @@ export default function Home() {
     activeJobId,
     submitting,
     analyzing,
+    frameTarget,
     selectedFrames,
     expandedFrame,
     framePanelScale,
@@ -507,6 +516,7 @@ export default function Home() {
     onSubmitUrl: handleSubmit,
     onUploadFile: handleUpload,
     onAnalyze: handleAnalyze,
+    onFrameTargetChange: setFrameTarget,
     onToggleFrame: handleToggleFrame,
     onExpandFrame: setExpandedFrame,
     onOpenFramePanel: handleOpenFramePanel,
diff --git a/web/components/nodes/index.tsx b/web/components/nodes/index.tsx
index caeff2f..2ca7ffb 100644
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -16,7 +16,7 @@ import { toast } from "sonner"
 import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell"
 import { HoverPreview } from "./hover-preview"
 import {
-  type Job, type ImageRef,
+  type Job, type ImageRef, type FrameExtractTarget,
   apiAssetUrl, effectiveFrameUrl, videoUrl, hasCutout, representativeCutoutUrl,
 } from "@/lib/api"
 import { FrameLightbox } from "@/components/lightbox"
@@ -29,6 +29,7 @@ export interface NodeData {
   activeJobId: string | null
   submitting: boolean
   analyzing: boolean
+  frameTarget: FrameExtractTarget
   selectedFrames: Set<number>
   expandedFrame: number | null
   framePanelScale?: number
@@ -40,6 +41,7 @@ export interface NodeData {
   onSubmitUrl: (url: string) => void
   onUploadFile: (file: File) => void
   onAnalyze: () => void
+  onFrameTargetChange: (target: FrameExtractTarget) => void
   onToggleFrame: (idx: number) => void
   onExpandFrame: (idx: number) => void
   onOpenFramePanel?: (idx: number) => void  // 打开/找回画布内关键帧详情面板
@@ -119,6 +121,13 @@ function clamp(value: number, min: number, max: number) {
 
 const THUMBNAIL_HEIGHT = 176
 const FLOATING_PANEL_EDGE_INSET = 8
+const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hint: string }> = [
+  { value: "balanced", label: "综合关键帧", hint: "清晰、去重、变化、时间覆盖" },
+  { value: "subject", label: "清晰主体", hint: "人物 / 产品主体更清楚" },
+  { value: "transition", label: "转场变化", hint: "切镜和画面变化优先" },
+  { value: "expression", label: "表情瞬间", hint: "人物 / 动物表情倾向" },
+  { value: "motion", label: "动作峰值", hint: "动作变化更明显" },
+]
 
 function canvasThumbnailAnchor(root: HTMLDivElement | null, target: HTMLElement) {
   if (!root) return { x: 160, y: 0 }
@@ -423,6 +432,7 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
   const isDone = job?.status === "transcribed"
   const hasFrames = (job?.frames.length ?? 0) > 0
   const inputLocked = isDownloading || d.submitting
+  const activeFrameTarget = FRAME_TARGET_OPTIONS.find((option) => option.value === d.frameTarget) ?? FRAME_TARGET_OPTIONS[0]
 
   return (
     <div ref={rootRef} className="relative" style={{ width: "100%", height: "100%" }}>
@@ -607,6 +617,24 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
             <span className="text-[var(--text-faint)]">{job.url.startsWith("upload://") ? "📎 上传" : "🔗 链接"}</span>
           </div>
 
+          <label className="mt-2 block rounded-md border border-black/10 bg-white/55 px-2.5 py-2 text-[11px] dark:border-white/10 dark:bg-white/[0.06]">
+            <div className="flex items-center gap-2">
+              <span className="shrink-0 font-semibold text-[var(--text-strong)]">抽帧目标</span>
+              <select
+                value={d.frameTarget}
+                disabled={isAnalyzing || d.analyzing}
+                onChange={(e) => d.onFrameTargetChange(e.target.value as FrameExtractTarget)}
+                className="min-w-0 flex-1 cursor-pointer rounded-md border border-black/10 bg-white/80 px-2 py-1.5 text-[11px] font-medium text-[var(--text-strong)] outline-none transition focus:ring-2 focus:ring-[var(--ring)] disabled:cursor-not-allowed disabled:opacity-50 dark:border-white/10 dark:bg-black/35"
+                aria-label="选择自动抽帧目标"
+              >
+                {FRAME_TARGET_OPTIONS.map((option) => (
+                  <option key={option.value} value={option.value}>{option.label}</option>
+                ))}
+              </select>
+            </div>
+            <div className="mt-1 truncate text-[10px] text-[var(--text-faint)]">{activeFrameTarget.hint}</div>
+          </label>
+
           <button
             type="button"
             disabled={isAnalyzing || d.analyzing}
diff --git a/web/lib/api.ts b/web/lib/api.ts
index 0ef54af..8c9e8de 100644
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -128,6 +128,8 @@ export interface KeyFrame {
   generated_images?: GeneratedImage[]
 }
 
+export type FrameExtractTarget = "balanced" | "subject" | "transition" | "expression" | "motion"
+
 export interface TranscriptSegment {
   index: number
   start: number
@@ -259,8 +261,9 @@ export async function triggerTranscribe(id: string): Promise<Job> {
   return res.json()
 }
 
-export async function analyzeJob(id: string, frames = 5): Promise<Job> {
-  const res = await fetch(`${API_BASE}/jobs/${id}/analyze?frames=${frames}`, { method: "POST" })
+export async function analyzeJob(id: string, frames = 5, target: FrameExtractTarget = "balanced"): Promise<Job> {
+  const qs = new URLSearchParams({ frames: String(frames), target })
+  const res = await fetch(`${API_BASE}/jobs/${id}/analyze?${qs.toString()}`, { method: "POST" })
   if (!res.ok) {
     const t = await res.text().catch(() => "")
     throw new Error(`analyze ${res.status} ${t.slice(0, 200)}`)