auto-save 2026-05-12 17:28 (~6)

2026-05-12 17:28:54 +08:00
parent e6b8615e3a
commit 6a9abeabc0
6 changed files with 210 additions and 22 deletions
--- a/.memory/worklog.json
+++ b/.memory/worklog.json
@@ -132,6 +132,13 @@
      "message": "auto-save 2026-05-12 17:17 (~1)",
      "hash": "4fd43e8",
      "files_changed": 1
    },
    {
      "ts": "2026-05-12T17:23:21+08:00",
      "type": "commit",
      "message": "auto-save 2026-05-12 17:23 (~2)",
      "hash": "e6b8615",
      "files_changed": 2
    }
  ]
 }
--- a/api/main.py
+++ b/api/main.py
@@ -132,6 +132,77 @@ def run(cmd: list[str], cwd: Path | None = None) -> str:
    return res.stdout
 # ---- 启发式选帧工具 ----
 import imagehash
 import numpy as np
 from PIL import Image
 def _sharpness(img_path: Path) -> float:
    """Laplacian variance：值越大越清晰，模糊/转场帧值低。"""
    g = np.asarray(Image.open(img_path).convert("L").resize((320, 180)), dtype=np.float32)
    lap = (-4 * g[1:-1, 1:-1]
           + g[:-2, 1:-1] + g[2:, 1:-1] + g[1:-1, :-2] + g[1:-1, 2:])
    return float(lap.var())
 def _select_keyframes(candidates: list[Path], n: int, dup_threshold: int = 8) -> list[Path]:
    """
    candidates: 按时间排序的候选帧路径
    n: 目标帧数
    dup_threshold: pHash 汉明距离 < 此值视为相似（默认 8，64bit hash 大致 ~12.5% 像素差）
    """
    if len(candidates) <= n:
        return candidates
    # 算 pHash + sharpness
    items = []
    for i, p in enumerate(candidates):
        try:
            img = Image.open(p)
            h = imagehash.phash(img)
            s = _sharpness(p)
            items.append({"path": p, "idx": i, "hash": h, "sharp": s})
        except Exception:
            continue
    # 去重：相似帧保留 sharpness 高的
    deduped: list[dict] = []
    for it in items:
        dup = None
        for kept in deduped:
            if (it["hash"] - kept["hash"]) < dup_threshold:
                dup = kept
                break
        if dup is None:
            deduped.append(it)
        elif it["sharp"] > dup["sharp"]:
            deduped[deduped.index(dup)] = it
    # 时序分桶：把候选时间轴等分 n 段，每段取去重后 sharpness 最高的
    total = len(candidates)
    buckets: list[list[dict]] = [[] for _ in range(n)]
    for it in deduped:
        b = min(int(it["idx"] * n / total), n - 1)
        buckets[b].append(it)
    selected: list[dict] = []
    for b in buckets:
        if b:
            selected.append(max(b, key=lambda x: x["sharp"]))
    # 空桶补足：从未选的 deduped 里按 sharpness 排序补
    chosen_paths = {it["path"] for it in selected}
    remaining = sorted([it for it in deduped if it["path"] not in chosen_paths],
                       key=lambda x: -x["sharp"])
    while len(selected) < n and remaining:
        selected.append(remaining.pop(0))
    # 按时间排序输出
    selected.sort(key=lambda x: x["idx"])
    return [it["path"] for it in selected]
 def ffprobe_meta(mp4: Path) -> dict:
    out = run([
        "ffprobe", "-v", "error", "-print_format", "json", "-show_streams", "-show_format", str(mp4),
@@ -194,37 +265,55 @@ async def pipeline_analyze(job_id: str, frame_count: int = KEYFRAME_COUNT) -> No
        ])
        n = max(1, min(int(frame_count), 20))
-        update(job, message=f"抽取 {n} 张关键帧（均匀采样）…", progress=50)
+        # 候选数：n 的 6 倍或至少 24，封顶 60
        candidate_count = max(24, min(60, n * 6))
        update(job, message=f"抽取候选 {candidate_count} 张…", progress=45)
        frames_dir = d / "frames"
        if frames_dir.exists():
            shutil.rmtree(frames_dir)
        frames_dir.mkdir(parents=True)
        cand_dir = d / "candidates"
        if cand_dir.exists():
            shutil.rmtree(cand_dir)
        cand_dir.mkdir(parents=True)
-        # 均匀采样：在 duration / (n+1) 的等距时间点各抽 1 帧
+        # 1) 均匀采样大批候选（fast seek，每张 < 0.5s）
        # 用 -ss 在 -i 前 = fast seek，每张 < 0.5s
        duration = max(float(job.duration or 1.0), 0.1)
-        step = duration / (n + 1)
+        step = duration / (candidate_count + 1)
-        for i in range(n):
+        candidate_meta: list[tuple[Path, float]] = []  # (path, timestamp)
        for i in range(candidate_count):
            t = step * (i + 1)
-            out = frames_dir / f"sample_{i:03d}.jpg"
+            out = cand_dir / f"c_{i:03d}.jpg"
            run([
-                "ffmpeg", "-y",
+                "ffmpeg", "-y", "-ss", str(t), "-i", str(mp4),
                "-ss", str(t),
                "-i", str(mp4),
                "-frames:v", "1",
-                "-pix_fmt", "yuvj420p",
+                "-pix_fmt", "yuvj420p", "-q:v", "3",
                "-q:v", "3",
                str(out),
            ])
            if out.exists():
                candidate_meta.append((out, t))
-        all_frames = sorted(frames_dir.glob("*.jpg"))[:n]
+        # 2) D 启发式选 n 张：pHash 去重 + Laplacian 清晰度 + 时序分桶
        update(job, message=f"启发式筛选 {n} / {len(candidate_meta)} 张…", progress=60)
        cand_paths = [m[0] for m in candidate_meta]
        ts_by_path = {m[0]: m[1] for m in candidate_meta}
        chosen = _select_keyframes(cand_paths, n)
        # 3) 落盘到 frames/<idx>.jpg
        renamed: list[KeyFrame] = []
-        for i, src in enumerate(all_frames):
+        chosen_sorted = sorted(chosen, key=lambda p: ts_by_path[p])
        for i, src in enumerate(chosen_sorted):
            dst = frames_dir / f"{i:03d}.jpg"
-            if src != dst:
+            shutil.copyfile(src, dst)
-                src.rename(dst)
+            renamed.append(KeyFrame(
-            ts = (job.duration or 0) * (i + 0.5) / max(len(all_frames), 1)
+                index=i,
-            renamed.append(KeyFrame(index=i, timestamp=round(ts, 2), url=f"/jobs/{job_id}/frames/{i}.jpg"))
+                timestamp=round(ts_by_path[src], 2),
                url=f"/jobs/{job_id}/frames/{i}.jpg",
            ))
        # 4) 清理候选目录
        shutil.rmtree(cand_dir, ignore_errors=True)
        update(
            job,
@@ -420,6 +509,43 @@ async def trigger_analyze(job_id: str, bg: BackgroundTasks, frames: int = KEYFRA
    return job
@app.post("/jobs/{job_id}/frames", response_model=Job)
 def add_manual_frame(job_id: str, t: float) -> Job:
    """从指定时间戳手动抽 1 帧追加到 job.frames"""
    job = JOBS.get(job_id)
    if not job:
        raise HTTPException(404, "job not found")
    if not job.video_url:
        raise HTTPException(400, "video not ready")
    d = job_dir(job_id)
    mp4 = d / "source.mp4"
    if not mp4.exists():
        raise HTTPException(400, "source.mp4 missing")
    frames_dir = d / "frames"
    frames_dir.mkdir(parents=True, exist_ok=True)
    # 新 index：max(existing)+1（即使列表已按 ts 排序，文件名用 index 保持稳定）
    next_idx = max((f.index for f in job.frames), default=-1) + 1
    out = frames_dir / f"{next_idx:03d}.jpg"
    try:
        run([
            "ffmpeg", "-y", "-ss", str(t), "-i", str(mp4),
            "-frames:v", "1", "-pix_fmt", "yuvj420p", "-q:v", "3",
            str(out),
        ])
    except RuntimeError as e:
        raise HTTPException(500, f"ffmpeg failed: {e}")
    new_frame = KeyFrame(
        index=next_idx,
        timestamp=round(float(t), 2),
        url=f"/jobs/{job_id}/frames/{next_idx}.jpg",
    )
    merged = sorted(list(job.frames) + [new_frame], key=lambda f: f.timestamp)
    update(job, frames=merged, message=f"已手动加帧（{t:.1f}s），共 {len(merged)} 张")
    return job
@app.get("/jobs/{job_id}", response_model=Job)
 def get_job(job_id: str) -> Job:
    job = JOBS.get(job_id)
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -6,3 +6,6 @@ python-dotenv==1.0.1
 yt-dlp==2026.3.17
 openai==1.55.3
 httpx==0.27.2
 imagehash==4.3.1
 Pillow>=11.0
 numpy>=2.0
--- a/web/app/page.tsx
+++ b/web/app/page.tsx
@@ -13,7 +13,7 @@ import {
  type NodeData,
 } from "@/components/nodes"
 import { ThemeToggle } from "@/components/theme-toggle"
-import { analyzeJob, createJob, getJob, uploadJob, type Job } from "@/lib/api"
+import { addManualFrame, analyzeJob, createJob, getJob, uploadJob, type Job } from "@/lib/api"
 import { FrameLightbox } from "@/components/lightbox"
 const NODE_TYPES = {
@@ -113,6 +113,17 @@ export default function Home() {
    }
  }, [job?.id])
  const handleAddManualFrame = useCallback(async (t: number) => {
    if (!job) return
    try {
      const updated = await addManualFrame(job.id, t)
      setJob(updated)
      toast.success(`已加帧 @ ${t.toFixed(1)}s · 共 ${updated.frames.length} 张`)
    } catch (e) {
      toast.error("加帧失败：" + (e instanceof Error ? e.message : String(e)))
    }
  }, [job?.id])
  const handleToggleFrame = useCallback((idx: number) => {
    setSelectedFrames((prev) => {
      const next = new Set(prev)
@@ -174,7 +185,8 @@ export default function Home() {
    onAnalyze: handleAnalyze,
    onToggleFrame: handleToggleFrame,
    onExpandFrame: setExpandedFrame,
-  }), [job, submitting, analyzing, selectedFrames, handleSubmit, handleUpload, handleAnalyze, handleToggleFrame])
+    onAddManualFrame: handleAddManualFrame,
  }), [job, submitting, analyzing, selectedFrames, handleSubmit, handleUpload, handleAnalyze, handleToggleFrame, handleAddManualFrame])
  // 用 useNodesState 让 ReactFlow 自己管位置（避免轮询时重置 drag）
  const [nodes, setNodes, onNodesChange] = useNodesState<Node>(
--- a/web/components/nodes/index.tsx
+++ b/web/components/nodes/index.tsx
@@ -3,7 +3,7 @@ import { useRef, useState } from "react"
 import { type NodeProps } from "@xyflow/react"
 import {
  Link2, Upload, Download, Scissors, Image as ImageIcon,
-  Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2,
+  Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2, Plus,
 } from "lucide-react"
 import { NodeShell, type NodeStatus, type NodeKind } from "./node-shell"
 import { type Job, frameUrl, videoUrl } from "@/lib/api"
@@ -18,6 +18,7 @@ export interface NodeData {
  onAnalyze: () => void
  onToggleFrame: (idx: number) => void
  onExpandFrame: (idx: number) => void
  onAddManualFrame: (t: number) => void
 }
 /* ---- 状态映射工具 ---- */
@@ -60,7 +61,10 @@ function asrStatus(job: Job | null): NodeStatus {
 export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | any) {
  const d: NodeData = data
  const [url, setUrl] = useState("")
  const [videoT, setVideoT] = useState(0)
  const [addingFrame, setAddingFrame] = useState(false)
  const fileRef = useRef<HTMLInputElement>(null)
  const videoRef = useRef<HTMLVideoElement>(null)
  const job = d.job
  // 是否已下载 → 显示视频 + 解析按钮
@@ -68,6 +72,7 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
  const isDownloading = job?.status === "downloading" || job?.status === "created"
  const isAnalyzing = !!job && ["splitting", "frames_extracted", "transcribing"].includes(job.status)
  const isDone = job?.status === "transcribed"
  const hasFrames = (job?.frames.length ?? 0) > 0
  const inputLocked = isDownloading || d.submitting
  return (
@@ -127,8 +132,10 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
      {hasVideo && job && (
        <>
          <video
            ref={videoRef}
            src={videoUrl(job.id)}
            controls
            onTimeUpdate={(e) => setVideoT((e.target as HTMLVideoElement).currentTime)}
            className="w-full aspect-video rounded-md bg-black border border-black/10 dark:border-white/10"
          />
          <div className="mt-2 flex items-center justify-between text-[10.5px] font-mono text-[var(--text-faint)]">
@@ -137,17 +144,41 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
              {job.url.startsWith("upload://") ? `📎 ${job.url.slice(9)}` : "🔗"}
            </span>
          </div>
          {/* 手动拖加帧（已抽过帧才出现） */}
          {hasFrames && (
            <button
              type="button"
              disabled={addingFrame}
              onClick={async (e) => {
                e.stopPropagation()
                const t = videoRef.current?.currentTime ?? 0
                setAddingFrame(true)
                try {
                  await d.onAddManualFrame(t)
                } finally {
                  setAddingFrame(false)
                }
              }}
              className="mt-2 w-full text-[11.5px] py-2 rounded-md border border-dashed border-emerald-400/40 bg-emerald-400/5 hover:bg-emerald-400/10 text-emerald-300 dark:text-emerald-300 disabled:opacity-50 flex items-center justify-center gap-1.5"
              title="把视频当前播放时间点的画面加为新关键帧"
            >
              {addingFrame ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Plus className="h-3.5 w-3.5" />}
              {addingFrame ? "抽帧中…" : `+ 把 ${videoT.toFixed(1)}s 这一帧加为关键帧`}
            </button>
          )}
          <button
            type="button"
            disabled={isAnalyzing || d.analyzing}
            onClick={d.onAnalyze}
            className={`mt-2 w-full text-[14px] py-3 rounded-md bg-gradient-to-r from-indigo-500 to-violet-500 text-white hover:opacity-95 disabled:opacity-40 disabled:cursor-not-allowed flex items-center justify-center gap-2 font-semibold shadow-lg shadow-violet-500/30 ${
-              !isAnalyzing && !d.analyzing && !isDone ? "animate-[pulse_2s_ease-in-out_infinite] ring-2 ring-violet-400/40 ring-offset-2 ring-offset-transparent" : ""
+              !isAnalyzing && !d.analyzing && !isDone && !hasFrames ? "animate-[pulse_2s_ease-in-out_infinite] ring-2 ring-violet-400/40 ring-offset-2 ring-offset-transparent" : ""
            }`}
          >
            {(isAnalyzing || d.analyzing) ? (
              <><Loader2 className="h-4 w-4 animate-spin" /> 解析中…</>
-            ) : isDone ? (
+            ) : isDone || hasFrames ? (
              "重新解析"
            ) : (
              <>▶ 点这里开始解析</>
--- a/web/lib/api.ts
+++ b/web/lib/api.ts
@@ -84,6 +84,15 @@ export async function analyzeJob(id: string, frames = 5): Promise<Job> {
  return res.json()
 }
 export async function addManualFrame(id: string, t: number): Promise<Job> {
  const res = await fetch(`${API_BASE}/jobs/${id}/frames?t=${encodeURIComponent(t.toFixed(2))}`, { method: "POST" })
  if (!res.ok) {
    const txt = await res.text().catch(() => "")
    throw new Error(`addFrame ${res.status} ${txt.slice(0, 200)}`)
  }
  return res.json()
 }
 export function frameUrl(jobId: string, frameIndex: number): string {
  return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}.jpg`
 }