From 2de32523be30dcb3d4cc509f5eddceadab2764ec Mon Sep 17 00:00:00 2001 From: kang Date: Thu, 14 May 2026 11:36:32 +0800 Subject: [PATCH] auto-save 2026-05-14 11:36 (~3) --- .memory/worklog.json | 27 +++++++++++++-------------- api/main.py | 12 +++++++----- docs/source-analysis.html | 2 +- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/.memory/worklog.json b/.memory/worklog.json index cfca66d..2e49b9d 100644 --- a/.memory/worklog.json +++ b/.memory/worklog.json @@ -1,19 +1,5 @@ { "entries": [ - { - "files_changed": 1, - "hash": "029b895", - "message": "auto-save 2026-05-13 03:30 (~1)", - "ts": "2026-05-13T03:30:47+08:00", - "type": "commit" - }, - { - "files_changed": 1, - "hash": "fc9b90c", - "message": "auto-save 2026-05-13 03:36 (~1)", - "ts": "2026-05-13T03:36:41+08:00", - "type": "commit" - }, { "files_changed": 1, "hash": "7158468", @@ -3301,6 +3287,19 @@ "type": "session-heartbeat", "message": "Codex 会话活跃 · 最近命令:codex · 1 项未提交变更 · 最近提交:auto-save 2026-05-14 11:21 (~7)", "files_changed": 1 + }, + { + "ts": "2026-05-14T11:30:57+08:00", + "type": "commit", + "message": "auto-save 2026-05-14 11:30 (~1)", + "hash": "6f72994", + "files_changed": 1 + }, + { + "ts": "2026-05-14T03:36:10Z", + "type": "session-heartbeat", + "message": "Codex 会话活跃 · 最近命令:codex · 3 项未提交变更 · 最近提交:auto-save 2026-05-14 11:30 (~1)", + "files_changed": 3 } ] } diff --git a/api/main.py b/api/main.py index c9b859b..3abc6ac 100644 --- a/api/main.py +++ b/api/main.py @@ -1104,10 +1104,11 @@ def _score_transparent_human_frame(img_path: Path) -> TransparentHumanFrameScore max_tokens=1200, ) raw = (resp.choices[0].message.content or "").strip() - if raw.startswith("```"): - import re as _re - match = _re.search(r"\{[\s\S]*\}", raw) - raw = match.group(0) if match else raw + if not raw: + raw = (getattr(resp.choices[0].message, "reasoning_content", "") or "").strip() + import re as _re + match = _re.search(r"\{[\s\S]*\}", raw) + raw = match.group(0) if match else raw data = json.loads(raw) except Exception as e: return TransparentHumanFrameScore(qualified=False, reject_reason=f"AI 评分失败:{e}") @@ -1611,7 +1612,8 @@ def _build_audio_script_sync(job_id: str, segments: list[TranscriptSegment]) -> voice_url = _minimax_tts_sync(job_id, rewritten) except Exception as e: voice_error = str(e) - errors = ";".join(x for x in [rewrite_error, voice_error] if x) + # 改写失败时已有本地 SKG 模板兜底,不把它标成用户可见错误;配音失败才需要提示。 + errors = voice_error return AudioScript( status="completed", source_text=source_text, diff --git a/docs/source-analysis.html b/docs/source-analysis.html index c8e4027..503ab88 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -950,7 +950,7 @@ SubjectAsset {

问题:等待抽帧完成后自动启动音频,不符合“先把声音文案拿出来审核”的工作流;用户需要在音频卡片上直接触发。

-

改动:移除前端抽帧完成后的自动转写逻辑;AudioNode 保留并固定显示“提取音频 / 重新提取音频”按钮。后端 /transcribe 不再要求 frames_extracted,视频就绪后可直接从 source.mp4 拆出 audio.wav,并继续 ASR、翻译、SKG 改写和 MiniMax 配音;抽帧中触发时不抢主状态,而是用 audio_script.status 表示音频处理中。当当前网关的 whisper-1 audio endpoint 返回 404 时,会 fallback 到 Gemini 多模态音频识别。

+

改动:移除前端抽帧完成后的自动转写逻辑;AudioNode 保留并固定显示“提取音频 / 重新提取音频”按钮。后端 /transcribe 不再要求 frames_extracted,视频就绪后可直接从 source.mp4 拆出 audio.wav,并继续 ASR、翻译、SKG 改写和 MiniMax 配音;抽帧中触发时不抢主状态,而是用 audio_script.status 表示音频处理中。当当前网关的 whisper-1 audio endpoint 返回 404 时,会 fallback 到 Gemini 多模态音频识别;改写模型解析失败时使用本地 SKG 口播模板兜底,不再把可用文案标成前端错误。

影响:web/app/page.tsxweb/components/nodes/index.tsxapi/main.pydocs/source-analysis.html