auto-save 2026-05-14 11:21 (~7)
This commit is contained in:
104
api/main.py
104
api/main.py
@@ -33,6 +33,7 @@ PRODUCT_LIBRARY_MANIFEST = PRODUCT_LIBRARY_DIR / "manifest.json"
|
||||
LLM_BASE_URL = os.getenv("LLM_BASE_URL", "").strip()
|
||||
LLM_API_KEY = os.getenv("LLM_API_KEY", "").strip()
|
||||
ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
|
||||
ASR_FALLBACK_MODEL = os.getenv("ASR_FALLBACK_MODEL", "gemini-2.5-flash").strip() or "gemini-2.5-flash"
|
||||
TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
|
||||
REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
|
||||
VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
|
||||
@@ -687,8 +688,8 @@ def _resolve_frame_quality(duration: float, quality: FrameExtractQuality) -> Fra
|
||||
cores = os.cpu_count() or 4
|
||||
memory_gb = _physical_memory_gb()
|
||||
strong_machine = cores >= 10 and (memory_gb == 0.0 or memory_gb >= 32)
|
||||
if strong_machine and duration <= 180:
|
||||
return "ultra"
|
||||
# 展示/演示时不能把本机资源打满:auto 最高只到 accurate。
|
||||
# ultra 保留为手动选择项,不再由 auto 自动命中。
|
||||
if strong_machine and duration <= 600:
|
||||
return "accurate"
|
||||
if cores >= 8 and duration <= 240:
|
||||
@@ -1157,6 +1158,16 @@ def ffprobe_meta(mp4: Path) -> dict:
|
||||
return json.loads(out)
|
||||
|
||||
|
||||
def media_duration(path: Path) -> float:
|
||||
try:
|
||||
out = run([
|
||||
"ffprobe", "-v", "error", "-print_format", "json", "-show_format", str(path),
|
||||
])
|
||||
return float(json.loads(out).get("format", {}).get("duration") or 0)
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def pipeline_download(job_id: str) -> None:
|
||||
"""阶段 1:仅下载(或上传跳过),落 source.mp4,停在 downloaded 等用户点解析/提取音频。"""
|
||||
job = JOBS[job_id]
|
||||
@@ -1362,21 +1373,83 @@ def analyze_queue_worker() -> None:
|
||||
|
||||
# ---------- 音频转写 + 翻译 + SKG 改写 + MiniMax 配音 ----------
|
||||
|
||||
def _parse_asr_segments(content: str, duration: float) -> list[dict]:
|
||||
raw = (content or "").strip()
|
||||
if raw.startswith("```"):
|
||||
import re as _re
|
||||
match = _re.search(r"(\[[\s\S]*\]|\{[\s\S]*\})", raw)
|
||||
raw = match.group(0) if match else raw
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
text = raw.strip()
|
||||
return [{"start": 0.0, "end": duration, "text": text}] if text else []
|
||||
if isinstance(data, dict):
|
||||
for key in ("segments", "data", "items", "result"):
|
||||
if isinstance(data.get(key), list):
|
||||
data = data[key]
|
||||
break
|
||||
else:
|
||||
text = str(data.get("text") or data.get("transcript") or "").strip()
|
||||
return [{"start": 0.0, "end": duration, "text": text}] if text else []
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
segments: list[dict] = []
|
||||
for i, item in enumerate(data):
|
||||
if isinstance(item, str):
|
||||
text = item.strip()
|
||||
start = 0.0 if len(data) == 1 else duration * i / max(1, len(data))
|
||||
end = duration if len(data) == 1 else duration * (i + 1) / max(1, len(data))
|
||||
elif isinstance(item, dict):
|
||||
text = str(item.get("text") or item.get("en") or item.get("transcript") or "").strip()
|
||||
start = float(item.get("start") or item.get("start_time") or 0)
|
||||
end = float(item.get("end") or item.get("end_time") or duration)
|
||||
else:
|
||||
continue
|
||||
if text:
|
||||
segments.append({"start": max(0.0, start), "end": max(start, end), "text": text})
|
||||
return segments
|
||||
|
||||
|
||||
def _transcribe_gemini_sync(wav: Path) -> list[dict]:
|
||||
duration = media_duration(wav)
|
||||
audio_b64 = base64.b64encode(wav.read_bytes()).decode("ascii")
|
||||
prompt = (
|
||||
"Transcribe the attached audio. Return strict JSON only, no markdown. "
|
||||
"Schema: [{\"start\": 0.0, \"end\": 1.2, \"text\": \"English transcript\"}]. "
|
||||
"Use English for the transcript. If exact timestamps are uncertain, return one segment "
|
||||
f"from 0 to {duration:.2f} seconds."
|
||||
)
|
||||
resp = llm().chat.completions.create(
|
||||
model=ASR_FALLBACK_MODEL,
|
||||
messages=[{"role": "user", "content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
|
||||
]}],
|
||||
temperature=0,
|
||||
)
|
||||
content = (resp.choices[0].message.content or "").strip()
|
||||
return _parse_asr_segments(content, duration)
|
||||
|
||||
|
||||
def _transcribe_sync(wav: Path) -> list[dict]:
|
||||
"""whisper-1 verbose_json → segments[{start, end, text}]"""
|
||||
with wav.open("rb") as f:
|
||||
resp = llm().audio.transcriptions.create(
|
||||
file=(wav.name, f, "audio/wav"),
|
||||
model=ASR_MODEL,
|
||||
response_format="verbose_json",
|
||||
timestamp_granularities=["segment"],
|
||||
)
|
||||
raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
|
||||
segments = raw.get("segments") or []
|
||||
# 兜底:网关如果不返回 segments,把全文当一段
|
||||
if not segments and raw.get("text"):
|
||||
segments = [{"start": 0.0, "end": float(raw.get("duration", 0) or 0), "text": raw["text"]}]
|
||||
return segments
|
||||
try:
|
||||
with wav.open("rb") as f:
|
||||
resp = llm().audio.transcriptions.create(
|
||||
file=(wav.name, f, "audio/wav"),
|
||||
model=ASR_MODEL,
|
||||
response_format="verbose_json",
|
||||
timestamp_granularities=["segment"],
|
||||
)
|
||||
raw = resp.model_dump() if hasattr(resp, "model_dump") else resp
|
||||
segments = raw.get("segments") or []
|
||||
# 兜底:网关如果不返回 segments,把全文当一段
|
||||
if not segments and raw.get("text"):
|
||||
segments = [{"start": 0.0, "end": float(raw.get("duration", 0) or 0), "text": raw["text"]}]
|
||||
return segments
|
||||
except Exception:
|
||||
return _transcribe_gemini_sync(wav)
|
||||
|
||||
|
||||
def _translate_sync(segments: list[dict]) -> list[str]:
|
||||
@@ -1865,6 +1938,7 @@ def health() -> dict:
|
||||
"base_url": LLM_BASE_URL or "openai-default",
|
||||
"models": {
|
||||
"asr": ASR_MODEL,
|
||||
"asr_fallback": ASR_FALLBACK_MODEL,
|
||||
"translate": TRANSLATE_MODEL,
|
||||
"rewrite": REWRITE_MODEL,
|
||||
"audio_rewrite": AUDIO_REWRITE_MODEL,
|
||||
|
||||
Reference in New Issue
Block a user