auto-save 2026-05-17 13:56 (~4)
This commit is contained in:
@@ -1,11 +1,5 @@
|
|||||||
{
|
{
|
||||||
"entries": [
|
"entries": [
|
||||||
{
|
|
||||||
"files_changed": 1,
|
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-14 18:16 (~1)",
|
|
||||||
"ts": "2026-05-14T10:18:43Z",
|
|
||||||
"type": "session-heartbeat"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"files_changed": 1,
|
"files_changed": 1,
|
||||||
"hash": "38ca0f2",
|
"hash": "38ca0f2",
|
||||||
@@ -3269,6 +3263,13 @@
|
|||||||
"type": "session-heartbeat",
|
"type": "session-heartbeat",
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:fix: recover media intake and remove audio strip",
|
"message": "Codex 会话活跃 · 最近命令:codex · 分支 main · 2 项未提交变更 · 最近提交:fix: recover media intake and remove audio strip",
|
||||||
"files_changed": 2
|
"files_changed": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-17T13:50:47+08:00",
|
||||||
|
"type": "commit",
|
||||||
|
"message": "auto-save 2026-05-17 13:50 (~2)",
|
||||||
|
"hash": "5a87149",
|
||||||
|
"files_changed": 2
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
5
RULES.md
5
RULES.md
@@ -52,8 +52,9 @@
|
|||||||
## 环境变量
|
## 环境变量
|
||||||
- `LLM_BASE_URL` / `LLM_API_KEY`:OpenAI 兼容网关,用于 ASR、翻译、文案改写、图像等模型调用
|
- `LLM_BASE_URL` / `LLM_API_KEY`:OpenAI 兼容网关,用于 ASR、翻译、文案改写、图像等模型调用
|
||||||
- `ASR_MODEL`:OpenAI Audio Transcriptions 音频转写模型,默认 `whisper-1`
|
- `ASR_MODEL`:OpenAI Audio Transcriptions 音频转写模型,默认 `whisper-1`
|
||||||
- `ASR_FALLBACK_MODEL`:当当前网关没有 `/audio/transcriptions` 时,用 Gemini 多模态 chat 直接识别 wav,默认 `gemini-2.5-flash`
|
- `ASR_FALLBACK_MODEL`:远端 ASR 和本机 ASR 都不可用时才尝试的多模态兜底,默认 `gemini-2.5-flash`;如果模型不能真实听到音频或返回疑似逐秒假字幕,后端必须拒绝写入时间轴
|
||||||
- `ASR_TIMEOUT_SECONDS`:ASR 单次请求超时,默认 45 秒;`whisper-1` 超时后自动走 `ASR_FALLBACK_MODEL`,避免第一步长时间停在转录中
|
- `ASR_TIMEOUT_SECONDS`:远端 ASR / 音频分析单次请求超时,默认 45 秒,避免第一步长时间停在转录中
|
||||||
|
- `LOCAL_ASR_BIN` / `LOCAL_ASR_MODEL` / `LOCAL_ASR_TIMEOUT_SECONDS`:本机 ASR 兜底,默认使用 `/opt/homebrew/bin/mlx_whisper` + `mlx-community/whisper-tiny`,用于当前 SKG 网关 `/audio/transcriptions` 不可用时生成真实逐句时间轴
|
||||||
- `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash`
|
- `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash`
|
||||||
- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gemini-2.5-pro`
|
- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gemini-2.5-pro`
|
||||||
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;当前第一步不默认调用口播改写,只保留原文案和声音分析
|
- `AUDIO_REWRITE_MODEL`:后续音频口播改写模型,默认跟随 `REWRITE_MODEL`;当前第一步不默认调用口播改写,只保留原文案和声音分析
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ WEB_AUTH_COOKIE_SECURE=false
|
|||||||
# 模型分工
|
# 模型分工
|
||||||
ASR_MODEL=whisper-1
|
ASR_MODEL=whisper-1
|
||||||
ASR_FALLBACK_MODEL=gemini-2.5-flash
|
ASR_FALLBACK_MODEL=gemini-2.5-flash
|
||||||
|
ASR_TIMEOUT_SECONDS=45
|
||||||
|
LOCAL_ASR_BIN=/opt/homebrew/bin/mlx_whisper
|
||||||
|
LOCAL_ASR_MODEL=mlx-community/whisper-tiny
|
||||||
|
LOCAL_ASR_TIMEOUT_SECONDS=180
|
||||||
TRANSLATE_MODEL=gemini-2.5-flash
|
TRANSLATE_MODEL=gemini-2.5-flash
|
||||||
REWRITE_MODEL=gemini-2.5-pro
|
REWRITE_MODEL=gemini-2.5-pro
|
||||||
IMAGE_MODEL=gemini-3-pro-image-preview
|
IMAGE_MODEL=gemini-3-pro-image-preview
|
||||||
|
|||||||
@@ -1793,6 +1793,8 @@ def _validate_asr_segments(segments: list[dict], duration: float, source: str) -
|
|||||||
raise TranscriptionUnavailable(f"{source} returned timestamps outside audio duration")
|
raise TranscriptionUnavailable(f"{source} returned timestamps outside audio duration")
|
||||||
if duration > 10 and last_end < duration * 0.45 and words < 20:
|
if duration > 10 and last_end < duration * 0.45 and words < 20:
|
||||||
raise TranscriptionUnavailable(f"{source} returned too little transcript coverage")
|
raise TranscriptionUnavailable(f"{source} returned too little transcript coverage")
|
||||||
|
for item in clean:
|
||||||
|
item["_source"] = source
|
||||||
return clean
|
return clean
|
||||||
|
|
||||||
|
|
||||||
@@ -2064,6 +2066,7 @@ def _audio_profile_model_sync(wav: Path, segments: list[TranscriptSegment], targ
|
|||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=900,
|
max_tokens=900,
|
||||||
|
timeout=ASR_TIMEOUT_SECONDS,
|
||||||
)
|
)
|
||||||
content = (resp.choices[0].message.content or "").strip()
|
content = (resp.choices[0].message.content or "").strip()
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
@@ -2322,6 +2325,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
|
|||||||
segments = _transcribe_sync(wav)
|
segments = _transcribe_sync(wav)
|
||||||
if not segments:
|
if not segments:
|
||||||
raise TranscriptionUnavailable("ASR 未返回可用字幕段")
|
raise TranscriptionUnavailable("ASR 未返回可用字幕段")
|
||||||
|
asr_source = str(segments[0].get("_source") or ASR_MODEL)
|
||||||
|
|
||||||
# 先把英文段落落到 job 上(让 UI 提前看到,翻译再补 zh)
|
# 先把英文段落落到 job 上(让 UI 提前看到,翻译再补 zh)
|
||||||
en_only = [
|
en_only = [
|
||||||
@@ -2368,7 +2372,7 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
|
|||||||
if manage_job_status:
|
if manage_job_status:
|
||||||
update(job, transcript=full, status="transcribed", progress=100,
|
update(job, transcript=full, status="transcribed", progress=100,
|
||||||
audio_script=audio_script,
|
audio_script=audio_script,
|
||||||
message=f"音频解析完成 · {len(full)} 段({ASR_MODEL} + {TRANSLATE_MODEL} + {ASR_FALLBACK_MODEL} 音频分析)")
|
message=f"音频解析完成 · {len(full)} 段({asr_source} + {TRANSLATE_MODEL} + {ASR_FALLBACK_MODEL} 音频分析)")
|
||||||
else:
|
else:
|
||||||
update(job, transcript=full, audio_script=audio_script)
|
update(job, transcript=full, audio_script=audio_script)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user