auto-save 2026-05-14 04:04 (~6)

This commit is contained in:
2026-05-14 04:04:54 +08:00
parent b95706a3e4
commit 87f1182afe
6 changed files with 171 additions and 65 deletions

View File

@@ -89,6 +89,7 @@ JobStatus = Literal[
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "5"))
FrameExtractTarget = Literal["balanced", "subject", "transition", "expression", "motion"]
FrameExtractMode = Literal["replace", "append"]
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
"balanced": "综合关键帧",
"subject": "清晰主体",
@@ -582,6 +583,7 @@ async def pipeline_analyze(
job_id: str,
frame_count: int = KEYFRAME_COUNT,
target: FrameExtractTarget = "balanced",
mode: FrameExtractMode = "replace",
) -> None:
"""阶段 2拆音轨 + 抽关键帧。ASR/翻译是独立文案轨,不阻塞视觉素材流。"""
job = JOBS[job_id]
@@ -591,13 +593,16 @@ async def pipeline_analyze(
if not mp4.exists():
raise RuntimeError("source.mp4 不存在,先完成下载")
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
wav = d / "audio.wav"
run([
"ffmpeg", "-y", "-i", str(mp4),
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
str(wav),
])
if wav.exists():
update(job, status="splitting", message="复用音轨 · 准备抽帧…", progress=35)
else:
update(job, status="splitting", message="ffmpeg 拆分音轨…", progress=35)
run([
"ffmpeg", "-y", "-i", str(mp4),
"-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le",
str(wav),
])
n = max(1, min(int(frame_count), 20))
target_label = FRAME_TARGET_LABELS.get(target, FRAME_TARGET_LABELS["balanced"])
@@ -607,7 +612,9 @@ async def pipeline_analyze(
update(job, message=f"低清扫描候选 · {target_label} · 约 {estimated_scan_count} 帧…", progress=45)
frames_dir = d / "frames"
if frames_dir.exists():
replacing = mode == "replace"
existing_frames = list(job.frames) if not replacing else []
if replacing and frames_dir.exists():
shutil.rmtree(frames_dir)
frames_dir.mkdir(parents=True)
scan_dir = d / "frame_scan"
@@ -637,15 +644,23 @@ async def pipeline_analyze(
raise RuntimeError("候选帧评分失败")
# 2) 目标化筛选pHash 去重 + 清晰度 / 中心细节 / 转场变化 / 动作强度 + 时序分桶。
selection_count = n if replacing else min(len(candidates), max(n * 4, n + len(existing_frames) + 2))
update(job, message=f"{target_label}筛选 {n} / {len(candidates)} 张…", progress=60)
chosen = _select_keyframes(candidates, n, target)
chosen = _select_keyframes(candidates, selection_count, target)
# 3) 只对最终选中的时间点,从原视频抽高质量关键帧。
renamed: list[KeyFrame] = []
chosen_sorted = sorted(chosen, key=lambda it: float(it["timestamp"]))
for i, item in enumerate(chosen_sorted):
dst = frames_dir / f"{i:03d}.jpg"
existing_timestamps = [float(f.timestamp) for f in existing_frames]
next_idx = max((int(f.index) for f in existing_frames), default=-1) + 1
for item in chosen_sorted:
if len(renamed) >= n:
break
t = float(item["timestamp"])
if not replacing and any(abs(t - old) < 0.35 for old in existing_timestamps):
continue
idx = next_idx + len(renamed)
dst = frames_dir / f"{idx:03d}.jpg"
run([
"ffmpeg", "-y", "-ss", f"{t:.3f}", "-i", str(mp4),
"-frames:v", "1",
@@ -653,20 +668,24 @@ async def pipeline_analyze(
str(dst),
])
renamed.append(KeyFrame(
index=i,
index=idx,
timestamp=round(t, 2),
url=f"/jobs/{job_id}/frames/{i}.jpg",
url=f"/jobs/{job_id}/frames/{idx}.jpg",
))
existing_timestamps.append(t)
# 4) 清理扫描目录
shutil.rmtree(scan_dir, ignore_errors=True)
merged_frames = sorted(existing_frames + renamed, key=lambda f: f.timestamp)
action_label = "追加" if not replacing else "抽取"
update(
job,
status="frames_extracted",
frames=renamed,
frames=merged_frames,
progress=70,
message=f"已按「{target_label}抽取 {len(renamed)} 张关键帧 · 可继续清洗 / 提取元素 / 分镜编排",
message=f"已按「{target_label}{action_label} {len(renamed)} 张关键帧 · {len(merged_frames)}",
)
except Exception as e:
@@ -1040,13 +1059,14 @@ async def trigger_analyze(
bg: BackgroundTasks,
frames: int = KEYFRAME_COUNT,
target: FrameExtractTarget = "balanced",
mode: FrameExtractMode = "replace",
) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
if job.status not in {"downloaded", "frames_extracted", "transcribed", "failed"}:
raise HTTPException(409, f"status must be downloaded/failed, got {job.status}")
bg.add_task(pipeline_analyze, job_id, frames, target)
bg.add_task(pipeline_analyze, job_id, frames, target, mode)
return job