auto-save 2026-05-14 10:25 (~13)
This commit is contained in:
@@ -1,12 +1,5 @@
|
|||||||
{
|
{
|
||||||
"entries": [
|
"entries": [
|
||||||
{
|
|
||||||
"files_changed": 1,
|
|
||||||
"hash": "ffba726",
|
|
||||||
"message": "auto-save 2026-05-13 01:01 (~1)",
|
|
||||||
"ts": "2026-05-13T01:01:52+08:00",
|
|
||||||
"type": "commit"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"files_changed": 1,
|
"files_changed": 1,
|
||||||
"hash": "f2d817d",
|
"hash": "f2d817d",
|
||||||
@@ -3315,6 +3308,13 @@
|
|||||||
"type": "session-heartbeat",
|
"type": "session-heartbeat",
|
||||||
"message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-14 10:14 (~7)",
|
"message": "Codex 会话活跃 · 最近命令:codex · 2 项未提交变更 · 最近提交:auto-save 2026-05-14 10:14 (~7)",
|
||||||
"files_changed": 2
|
"files_changed": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ts": "2026-05-14T10:20:16+08:00",
|
||||||
|
"type": "commit",
|
||||||
|
"message": "auto-save 2026-05-14 10:20 (~7)",
|
||||||
|
"hash": "be1ae80",
|
||||||
|
"files_changed": 7
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,15 @@
|
|||||||
{
|
{
|
||||||
"company" : "SKG",
|
"company" : "SKG",
|
||||||
"created" : "2026-05-12",
|
"created" : "2026-05-12",
|
||||||
"description" : "SKG AI 素材生产管线第二条思路验证:TK 链接 → 拆轨 → 关键帧≤10 + Gemini ASR\/翻译 → 接产品信息改写文案 → nano-banana-pro\/GPT Image 生图 → Seedance\/Kling\/Veo3 多模型生视频 → 合成带文案成品",
|
"credentials" : [
|
||||||
|
{
|
||||||
|
"description" : "MiniMax T2A 配音 API Key,本地开发只放 api/.env 的 MINIMAX_API_KEY,不入库",
|
||||||
|
"name" : "MINIMAX_API_KEY",
|
||||||
|
"storage" : "api/.env",
|
||||||
|
"type" : "api_key"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description" : "SKG AI 素材生产管线第二条思路验证:TK 链接 → 拆轨 → 目标化关键帧 + ASR\/翻译 → 接 SKG 产品信息改写口播 → MiniMax 配音 → nano-banana-pro\/GPT Image 生图 → Seedance\/Kling\/Veo3 多模型生视频 → 合成带文案成品",
|
||||||
"kind" : "app",
|
"kind" : "app",
|
||||||
"name" : "SKG AI 素材管线 - TK 二创验证",
|
"name" : "SKG AI 素材管线 - TK 二创验证",
|
||||||
"ownership" : "company",
|
"ownership" : "company",
|
||||||
@@ -20,7 +28,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stack" : [
|
"stack" : [
|
||||||
"Next.js + Python(yt-dlp\/ffmpeg) + Gemini + nano-banana-pro\/GPT Image + Seedance\/Kling\/Veo3"
|
"Next.js + Python(yt-dlp\/ffmpeg) + OpenAI-compatible LLM + MiniMax T2A + nano-banana-pro\/GPT Image + Seedance\/Kling\/Veo3"
|
||||||
],
|
],
|
||||||
"status" : "active",
|
"status" : "active",
|
||||||
"urls" : [
|
"urls" : [
|
||||||
|
|||||||
10
RULES.md
10
RULES.md
@@ -32,7 +32,15 @@
|
|||||||
- 部署完成后,`RULES.md` 和 `.project.json` 必须同一次任务一起更新
|
- 部署完成后,`RULES.md` 和 `.project.json` 必须同一次任务一起更新
|
||||||
|
|
||||||
## 环境变量
|
## 环境变量
|
||||||
- 待补充
|
- `LLM_BASE_URL` / `LLM_API_KEY`:OpenAI 兼容网关,用于 ASR、翻译、文案改写、图像等模型调用
|
||||||
|
- `ASR_MODEL`:音频转写模型,默认 `whisper-1`
|
||||||
|
- `TRANSLATE_MODEL`:字幕翻译模型,默认 `gemini-2.5-flash`
|
||||||
|
- `REWRITE_MODEL`:通用改写/分镜描述模型,默认 `gemini-2.5-pro`
|
||||||
|
- `AUDIO_REWRITE_MODEL`:音频口播改写模型,默认跟随 `REWRITE_MODEL`
|
||||||
|
- `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点
|
||||||
|
- `MINIMAX_API_KEY`:MiniMax T2A 配音 Key,只能放本地 `api/.env`,不能入库
|
||||||
|
- `MINIMAX_TTS_BASE_URL` / `MINIMAX_TTS_MODEL` / `MINIMAX_TTS_VOICE_ID`:MiniMax 配音端点、模型和音色配置
|
||||||
|
- `POE_API_KEY` / `VIDEO_API_KEY`:视频生成通道 Key,只能放本地环境变量
|
||||||
|
|
||||||
## 规则
|
## 规则
|
||||||
- 不允许编造不存在的部署域名、账号、密码
|
- 不允许编造不存在的部署域名、账号、密码
|
||||||
|
|||||||
@@ -14,11 +14,11 @@ VIDEO_MODEL_VEO3=veo-3.1-fast
|
|||||||
|
|
||||||
# 音频文案改写 + MiniMax 配音
|
# 音频文案改写 + MiniMax 配音
|
||||||
AUDIO_REWRITE_MODEL=gemini-2.5-pro
|
AUDIO_REWRITE_MODEL=gemini-2.5-pro
|
||||||
AUDIO_PRODUCT_BRIEF=SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。
|
AUDIO_PRODUCT_BRIEF="SKG 智能按摩产品,主打日常肩颈、腰背、眼部、膝盖或足部放松;广告表达要高级、干净、可信,不做医疗疗效承诺。"
|
||||||
MINIMAX_API_KEY=
|
MINIMAX_API_KEY=
|
||||||
MINIMAX_TTS_BASE_URL=https://api.minimax.io
|
MINIMAX_TTS_BASE_URL=https://api.minimax.io
|
||||||
MINIMAX_TTS_MODEL=speech-2.8-turbo
|
MINIMAX_TTS_MODEL=speech-2.8-turbo
|
||||||
MINIMAX_TTS_VOICE_ID=Chinese (Mandarin)_Reliable_Executive
|
MINIMAX_TTS_VOICE_ID="Chinese (Mandarin)_Reliable_Executive"
|
||||||
|
|
||||||
# Poe 视频 API(优先用于 Seedance / Kling / Veo)
|
# Poe 视频 API(优先用于 Seedance / Kling / Veo)
|
||||||
POE_API_BASE_URL=https://api.poe.com/v1
|
POE_API_BASE_URL=https://api.poe.com/v1
|
||||||
@@ -41,6 +41,7 @@ VIDEO_CONTENT_PATH=/videos/{id}/content
|
|||||||
VIDEO_DURATION_FIELD=seconds
|
VIDEO_DURATION_FIELD=seconds
|
||||||
|
|
||||||
# 工作目录
|
# 工作目录
|
||||||
|
KEYFRAME_COUNT=12
|
||||||
JOBS_DIR=./jobs
|
JOBS_DIR=./jobs
|
||||||
|
|
||||||
# CORS
|
# CORS
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ cd api
|
|||||||
python3 -m venv .venv
|
python3 -m venv .venv
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
cp .env.example .env # 按需填 GEMINI_API_KEY
|
cp .env.example .env # 按需填 LLM_API_KEY / MINIMAX_API_KEY
|
||||||
uvicorn main:app --port 4291 --reload
|
uvicorn main:app --port 4291 --reload
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ JobStatus = Literal[
|
|||||||
"transcribing", "transcribed", "failed",
|
"transcribing", "transcribed", "failed",
|
||||||
]
|
]
|
||||||
|
|
||||||
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "5"))
|
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "12"))
|
||||||
FrameExtractTarget = Literal["transparent_human", "balanced", "subject", "transition", "expression", "motion"]
|
FrameExtractTarget = Literal["transparent_human", "balanced", "subject", "transition", "expression", "motion"]
|
||||||
FrameExtractMode = Literal["replace", "append"]
|
FrameExtractMode = Literal["replace", "append"]
|
||||||
FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"]
|
FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"]
|
||||||
@@ -1327,7 +1327,7 @@ async def analyze_queue_worker() -> None:
|
|||||||
ANALYZE_WORKER_RUNNING = False
|
ANALYZE_WORKER_RUNNING = False
|
||||||
|
|
||||||
|
|
||||||
# ---------- Gemini ASR + 翻译 ----------
|
# ---------- 音频转写 + 翻译 + SKG 改写 + MiniMax 配音 ----------
|
||||||
|
|
||||||
def _transcribe_sync(wav: Path) -> list[dict]:
|
def _transcribe_sync(wav: Path) -> list[dict]:
|
||||||
"""whisper-1 verbose_json → segments[{start, end, text}]"""
|
"""whisper-1 verbose_json → segments[{start, end, text}]"""
|
||||||
@@ -1347,7 +1347,7 @@ def _transcribe_sync(wav: Path) -> list[dict]:
|
|||||||
|
|
||||||
|
|
||||||
def _translate_sync(segments: list[dict]) -> list[str]:
|
def _translate_sync(segments: list[dict]) -> list[str]:
|
||||||
"""gemini-2.5-flash 批量翻译为中文,按段返回"""
|
"""批量翻译为中文,按段返回"""
|
||||||
payload = [{"i": i, "en": s.get("text", "").strip()} for i, s in enumerate(segments)]
|
payload = [{"i": i, "en": s.get("text", "").strip()} for i, s in enumerate(segments)]
|
||||||
prompt = (
|
prompt = (
|
||||||
"你是字幕翻译。把下列英文字幕段翻译为简体中文,保持原意、口语化、自然流畅。"
|
"你是字幕翻译。把下列英文字幕段翻译为简体中文,保持原意、口语化、自然流畅。"
|
||||||
|
|||||||
@@ -552,13 +552,14 @@
|
|||||||
<p>当前产品不是“复制别人的视频”,而是拆解参考视频,提取可借鉴的镜头元素,再改造成 SKG 产品语境的视频素材。</p>
|
<p>当前产品不是“复制别人的视频”,而是拆解参考视频,提取可借鉴的镜头元素,再改造成 SKG 产品语境的视频素材。</p>
|
||||||
<div class="pipeline">
|
<div class="pipeline">
|
||||||
<div class="step"><div class="num">1</div><h3>输入</h3><p>TK 链接或本地上传,后端下载/保存源视频。</p></div>
|
<div class="step"><div class="num">1</div><h3>输入</h3><p>TK 链接或本地上传,后端下载/保存源视频。</p></div>
|
||||||
<div class="step"><div class="num">2</div><h3>镜头拆解</h3><p>拆轨、抽关键帧、手动加帧,形成参考分镜池。当前主题默认使用“透明骨架人”抽帧目标:本地先扫候选,Vision 再按透明身体、白色骨架、人物占比、清晰度、广告感和产品可用性打分验收;不合格候选会自动换下一帧。</p></div>
|
<div class="step"><div class="num">2</div><h3>镜头拆解</h3><p>拆轨、抽关键帧、手动加帧,形成参考分镜池。当前主题默认直接抽 12 帧,并使用“透明骨架人”抽帧目标:本地先扫候选,Vision 再按透明身体、白色骨架、人物占比、清晰度、广告感和产品可用性打分验收;不合格候选会自动换下一帧。</p></div>
|
||||||
<div class="step"><div class="num">3</div><h3>清洗水印</h3><p>对关键帧做全图或区域清洗,清洗版先进入待审核状态;确认后可单张替换,也可一键替换全部待应用清洗版。</p></div>
|
<div class="step"><div class="num">3</div><h3>清洗水印</h3><p>对关键帧做全图或区域清洗,清洗版先进入待审核状态;确认后可单张替换,也可一键替换全部待应用清洗版。</p></div>
|
||||||
<div class="step"><div class="num">4</div><h3>主体识别</h3><p>识别场景和主体候选,只是候选,不应锁死。</p></div>
|
<div class="step"><div class="num">4</div><h3>主体识别</h3><p>识别场景和主体候选,只是候选,不应锁死。</p></div>
|
||||||
<div class="step"><div class="num">5</div><h3>素材准备</h3><p>清洗关键帧,把多张关键帧作为同一主体的参考,先重绘六张标准站立主体资产图,再按关键帧生成多个去主体、相似或换风格场景图。</p></div>
|
<div class="step"><div class="num">5</div><h3>素材准备</h3><p>清洗关键帧,把多张关键帧作为同一主体的参考,先重绘六张标准站立主体资产图,再按关键帧生成多个去主体、相似或换风格场景图。</p></div>
|
||||||
<div class="step"><div class="num">6</div><h3>分镜改造</h3><p>把参考主体、场景、动作和 SKG 产品放入分镜结构;产品融合使用纵向 6 行镜头工作表,每行绑定产品图、白底人物图、产品区域、场景图、描述词、秒数和单条生成入口。</p></div>
|
<div class="step"><div class="num">6</div><h3>分镜改造</h3><p>把参考主体、场景、动作和 SKG 产品放入分镜结构;产品融合使用纵向 6 行镜头工作表,每行绑定产品图、白底人物图、产品区域、场景图、描述词、秒数和单条生成入口。</p></div>
|
||||||
<div class="step"><div class="num">7</div><h3>生成视频</h3><p>普通分镜可调用 Seedance / Kling / Veo 3;产品融合固定用 GPT Image 2 生成位置引导图,再用 Seedance 按秒数生成视频,结果回写到画面工作台节点。</p></div>
|
<div class="step"><div class="num">7</div><h3>生成视频</h3><p>普通分镜可调用 Seedance / Kling / Veo 3;产品融合固定用 GPT Image 2 生成位置引导图,再用 Seedance 按秒数生成视频,结果回写到画面工作台节点。</p></div>
|
||||||
<div class="step"><div class="num">8</div><h3>合成成品</h3><p>片段、字幕、配音、转场合成最终 mp4。当前未实现。</p></div>
|
<div class="step"><div class="num">8</div><h3>声音文案</h3><p>音频轨独立处理:ASR 提取原始文案、翻译成中文、接 SKG 产品卖点改写成口播稿;配置 MiniMax 后直接生成配音 mp3。</p></div>
|
||||||
|
<div class="step"><div class="num">9</div><h3>合成成品</h3><p>片段、字幕、配音、转场合成最终 mp4。当前未实现。</p></div>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -583,7 +584,7 @@
|
|||||||
<h3>后端核心</h3>
|
<h3>后端核心</h3>
|
||||||
<table>
|
<table>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><td><code>api/main.py</code></td><td>FastAPI 单文件后端:状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、文件返回。</td></tr>
|
<tr><td><code>api/main.py</code></td><td>FastAPI 单文件后端:状态模型、任务恢复、下载、抽帧、Vision、清洗、元素、分镜、音频文案改写、MiniMax 配音、文件返回。</td></tr>
|
||||||
<tr><td><code>api/product_library/skg-products</code></td><td>内置 SKG 白底产品图库:<code>manifest.json</code> 记录从桌面产品图筛出的 gallery 白底图,<code>images/</code> 存 41 张压缩后的参考图。</td></tr>
|
<tr><td><code>api/product_library/skg-products</code></td><td>内置 SKG 白底产品图库:<code>manifest.json</code> 记录从桌面产品图筛出的 gallery 白底图,<code>images/</code> 存 41 张压缩后的参考图。</td></tr>
|
||||||
<tr><td><code>jobs/<jobId>/state.json</code></td><td>运行时状态文件,不在源码列表里,但刷新恢复依赖它。</td></tr>
|
<tr><td><code>jobs/<jobId>/state.json</code></td><td>运行时状态文件,不在源码列表里,但刷新恢复依赖它。</td></tr>
|
||||||
<tr><td><code>jobs/<jobId>/frames</code></td><td>关键帧 jpg。注意 frame.index 是稳定 ID,不等于数组下标。</td></tr>
|
<tr><td><code>jobs/<jobId>/frames</code></td><td>关键帧 jpg。注意 frame.index 是稳定 ID,不等于数组下标。</td></tr>
|
||||||
@@ -605,8 +606,8 @@ web/app/page.tsx
|
|||||||
|
|
||||||
后端主链路:
|
后端主链路:
|
||||||
api/main.py
|
api/main.py
|
||||||
-> Job / KeyFrame / KeyElement / StoryboardScene
|
-> Job / KeyFrame / KeyElement / StoryboardScene / AudioScript
|
||||||
-> 下载 / 上传 / 抽帧 / Vision / 清洗 / 元素提取 / 分镜保存
|
-> 下载 / 上传 / 抽帧 / Vision / 清洗 / 元素提取 / 分镜保存 / 音频文案改写 / MiniMax 配音
|
||||||
-> jobs/<jobId>/state.json + 图片文件落盘</pre>
|
-> jobs/<jobId>/state.json + 图片文件落盘</pre>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -647,6 +648,7 @@ api/main.py
|
|||||||
video_url, duration, width, height,
|
video_url, duration, width, height,
|
||||||
frames: KeyFrame[],
|
frames: KeyFrame[],
|
||||||
transcript: TranscriptSegment[],
|
transcript: TranscriptSegment[],
|
||||||
|
audio_script: AudioScript,
|
||||||
storyboard_images?: StoryboardImage[]
|
storyboard_images?: StoryboardImage[]
|
||||||
}</pre>
|
}</pre>
|
||||||
</div>
|
</div>
|
||||||
@@ -692,6 +694,23 @@ api/main.py
|
|||||||
cutout_id,
|
cutout_id,
|
||||||
subject_kind: object | living,
|
subject_kind: object | living,
|
||||||
subject_assets: SubjectAsset[]
|
subject_assets: SubjectAsset[]
|
||||||
|
}</pre>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<h3>AudioScript</h3>
|
||||||
|
<p>音频文案轨的结构化产物。<code>pipeline_transcribe</code> 在 ASR 和翻译后写入:先生成 SKG 口播改写稿,再用 MiniMax T2A 生成配音文件。</p>
|
||||||
|
<pre>AudioScript {
|
||||||
|
status: idle | rewriting | completed | failed,
|
||||||
|
source_text,
|
||||||
|
source_zh,
|
||||||
|
rewritten_text,
|
||||||
|
product_brief,
|
||||||
|
rewrite_model,
|
||||||
|
voice_provider: minimax,
|
||||||
|
voice_model,
|
||||||
|
voice_id,
|
||||||
|
voice_url,
|
||||||
|
error
|
||||||
}</pre>
|
}</pre>
|
||||||
</div>
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
@@ -769,7 +788,9 @@ SubjectAsset {
|
|||||||
<tr><td>创建任务</td><td><code>POST /jobs</code></td><td><code>createJob</code></td><td>提交 TK 链接,后台开始下载,停在 downloaded 等用户点解析。</td></tr>
|
<tr><td>创建任务</td><td><code>POST /jobs</code></td><td><code>createJob</code></td><td>提交 TK 链接,后台开始下载,停在 downloaded 等用户点解析。</td></tr>
|
||||||
<tr><td>上传视频</td><td><code>POST /jobs/upload</code></td><td><code>uploadJob</code></td><td>保存 source.mp4,然后同样进入下载完成状态。</td></tr>
|
<tr><td>上传视频</td><td><code>POST /jobs/upload</code></td><td><code>uploadJob</code></td><td>保存 source.mp4,然后同样进入下载完成状态。</td></tr>
|
||||||
<tr><td>删除输入视频</td><td><code>DELETE /jobs/{id}</code></td><td><code>deleteJob</code></td><td>从任务队列、URL 和磁盘 <code>jobs/<id></code> 目录移除整个 job,包括源视频、关键帧、元素提取图和生成视频。</td></tr>
|
<tr><td>删除输入视频</td><td><code>DELETE /jobs/{id}</code></td><td><code>deleteJob</code></td><td>从任务队列、URL 和磁盘 <code>jobs/<id></code> 目录移除整个 job,包括源视频、关键帧、元素提取图和生成视频。</td></tr>
|
||||||
<tr><td>解析视频</td><td><code>POST /jobs/{id}/analyze?frames=&target=&mode=&quality=</code></td><td><code>analyzeJob</code></td><td>拆轨 + 目标化抽关键帧。<code>target</code> 支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值;当前 UI 默认 <code>transparent_human</code>。透明骨架人目标会先扩大本地候选池,再调用 Vision 按 6 个分数验收;不合格候选自动丢弃并抽下一候选。<code>mode=append</code> 追加新关键帧;<code>quality=auto</code> 根据本机算力和视频时长自动选择快速、精细或极准。多个抽帧请求进入后端队列顺序处理。</td></tr>
|
<tr><td>解析视频</td><td><code>POST /jobs/{id}/analyze?frames=&target=&mode=&quality=</code></td><td><code>analyzeJob</code></td><td>拆轨 + 目标化抽关键帧。默认 <code>frames=12</code>;<code>target</code> 支持透明骨架人、综合、清晰主体、转场变化、表情瞬间、动作峰值;当前 UI 默认 <code>transparent_human</code>。透明骨架人目标会先扩大本地候选池,再调用 Vision 按 6 个分数验收;不合格候选自动丢弃并抽下一候选。<code>mode=append</code> 追加新关键帧;<code>quality=auto</code> 根据本机算力和视频时长自动选择快速、精细或极准。多个抽帧请求进入后端队列顺序处理。</td></tr>
|
||||||
|
<tr><td>音频文案轨</td><td><code>POST /jobs/{id}/transcribe</code></td><td><code>triggerTranscribe</code></td><td>读取拆轨得到的 <code>audio.wav</code>,先 ASR 得到英文时间戳段落,再翻译中文,随后按 <code>AUDIO_PRODUCT_BRIEF</code> 生成 <code>audio_script.rewritten_text</code>;配置 <code>MINIMAX_API_KEY</code> 后调用 MiniMax T2A 生成 <code>audio_script.voice_url</code>。</td></tr>
|
||||||
|
<tr><td>改写配音文件</td><td><code>GET /jobs/{id}/audio-script.mp3</code></td><td><code>apiAssetUrl(job.audio_script.voice_url)</code></td><td>返回 MiniMax T2A 生成的 mp3。没有配置 MiniMax 或生成失败时该文件不存在,但改写文案仍会保存在 <code>audio_script.rewritten_text</code>。</td></tr>
|
||||||
<tr><td>手动加帧</td><td><code>POST /jobs/{id}/frames?t=</code></td><td><code>addManualFrame</code></td><td>按视频时间戳抽一帧,index 递增但 frames 按 timestamp 排序。</td></tr>
|
<tr><td>手动加帧</td><td><code>POST /jobs/{id}/frames?t=</code></td><td><code>addManualFrame</code></td><td>按视频时间戳抽一帧,index 递增但 frames 按 timestamp 排序。</td></tr>
|
||||||
<tr><td>Vision 识别</td><td><code>POST /frames/{idx}/describe</code></td><td><code>describeFrame</code></td><td>写入 frame.description,后续可从 objects 加候选元素。</td></tr>
|
<tr><td>Vision 识别</td><td><code>POST /frames/{idx}/describe</code></td><td><code>describeFrame</code></td><td>写入 frame.description,后续可从 objects 加候选元素。</td></tr>
|
||||||
<tr><td>清洗水印</td><td><code>POST /frames/{idx}/cleanup</code></td><td><code>cleanupFrame</code></td><td>支持全图和区域清洗,生成 cleaned 待应用版本;前端批量清洗会顺序调用该接口,不自动覆盖原图。单帧清洗状态按 frame.index 隔离,清洗某一张不会禁用其他关键帧的清洗按钮。</td></tr>
|
<tr><td>清洗水印</td><td><code>POST /frames/{idx}/cleanup</code></td><td><code>cleanupFrame</code></td><td>支持全图和区域清洗,生成 cleaned 待应用版本;前端批量清洗会顺序调用该接口,不自动覆盖原图。单帧清洗状态按 frame.index 隔离,清洗某一张不会禁用其他关键帧的清洗按钮。</td></tr>
|
||||||
@@ -815,10 +836,10 @@ SubjectAsset {
|
|||||||
<td><code>StoryboardWorkbench</code>、<code>updateStoryboard</code></td>
|
<td><code>StoryboardWorkbench</code>、<code>updateStoryboard</code></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><span class="tag gray">ASR / Translate / Rewrite</span></td>
|
<td><span class="tag gray">Audio / ASR / Rewrite</span></td>
|
||||||
<td>未来的文案轨,目前部分占位或受 audio 阻塞。</td>
|
<td>独立声音文案轨:从 <code>audio.wav</code> 提取原始口播、翻译中文、改写成 SKG 产品语境口播;MiniMax T2A 配置后生成配音 mp3。主画布的 <code>AudioNode</code> 只展示模型链路、改写稿和配音播放器。</td>
|
||||||
<td>不要阻断视觉素材管线。</td>
|
<td>不要阻断视觉素材管线。</td>
|
||||||
<td><code>ASRNode</code>、<code>TranslateNode</code>、<code>RewriteNode</code>、ASR 接口</td>
|
<td><code>AudioNode</code>、<code>ASRNode</code>、<code>TranslateNode</code>、<code>RewriteNode</code>、<code>pipeline_transcribe</code>、<code>AudioScript</code></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><span class="tag green">Video / Compose</span></td>
|
<td><span class="tag green">Video / Compose</span></td>
|
||||||
@@ -843,15 +864,16 @@ SubjectAsset {
|
|||||||
<li>Vision 识别关键帧,输出 scene、objects、style、suggested_prompt,并作为主体候选来源。</li>
|
<li>Vision 识别关键帧,输出 scene、objects、style、suggested_prompt,并作为主体候选来源。</li>
|
||||||
<li>主体候选确认、改名、删除和主体资产包生成。</li>
|
<li>主体候选确认、改名、删除和主体资产包生成。</li>
|
||||||
<li>分镜工作台 4 图槽和改造说明自动保存。</li>
|
<li>分镜工作台 4 图槽和改造说明自动保存。</li>
|
||||||
|
<li>音频文案轨:ASR/翻译后自动生成 SKG 口播改写稿;配置 MiniMax 后生成配音 mp3。</li>
|
||||||
<li>nano-banana-pro image-to-image 生图。</li>
|
<li>nano-banana-pro image-to-image 生图。</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h3>阻塞 / 占位</h3>
|
<h3>阻塞 / 占位</h3>
|
||||||
<ul>
|
<ul>
|
||||||
<li>ASR:SKG 网关 audio endpoint 404 或渠道不可用。</li>
|
<li>ASR:仍依赖当前 OpenAI-compatible 音频转写入口;如果该网关 audio endpoint 不通,文案提取仍会失败。</li>
|
||||||
<li>Translate:本身 text 通,但产品流里依赖 ASR 段落。</li>
|
<li>MiniMax:当前接入的是官方 T2A 配音能力,不是 ASR;API Key 只能放本地环境变量,不能写入仓库。</li>
|
||||||
<li>Rewrite:需要 SKG 产品信息模板和目标脚本结构。</li>
|
<li>Audio Product Brief:默认是通用 SKG 放松产品卖点,后续可改成跟已选产品库条目联动。</li>
|
||||||
<li>Video Gen:模型层按业务保留 Seedance / Kling / Veo/Voe 选择;后端已支持 Poe 视频通道,别名默认映射到 <code>seedance-2-fast</code>、<code>kling-omni</code>、<code>veo-3.1-fast</code>,提交后写入 Video Gen 节点。</li>
|
<li>Video Gen:模型层按业务保留 Seedance / Kling / Veo/Voe 选择;后端已支持 Poe 视频通道,别名默认映射到 <code>seedance-2-fast</code>、<code>kling-omni</code>、<code>veo-3.1-fast</code>,提交后写入 Video Gen 节点。</li>
|
||||||
<li>Compose:还没做本地 ffmpeg 字幕/TTS 合成。</li>
|
<li>Compose:还没做本地 ffmpeg 字幕/TTS 合成。</li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -892,6 +914,31 @@ SubjectAsset {
|
|||||||
<h2>变更记录</h2>
|
<h2>变更记录</h2>
|
||||||
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
<p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
|
||||||
<div class="changelog">
|
<div class="changelog">
|
||||||
|
<article class="change">
|
||||||
|
<header>
|
||||||
|
<h3>2026-05-14 · 音频处理接入 SKG 口播改写与 MiniMax 配音</h3>
|
||||||
|
<span class="tag gray">Audio</span>
|
||||||
|
<span class="tag green">MiniMax</span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<p><strong>问题:</strong>音频处理节点之前只说明“音轨 → ASR → 翻译 → 改写”,没有真实改写产物,也没有配音输出;用户无法直接拿到符合 SKG 产品语境的口播。</p>
|
||||||
|
<p><strong>改动:</strong><code>Job</code> 新增 <code>audio_script</code>,<code>pipeline_transcribe</code> 在 ASR 和翻译后生成 SKG 改写文案,并在配置 <code>MINIMAX_API_KEY</code> 时调用 MiniMax T2A 输出 <code>/jobs/{id}/audio-script.mp3</code>。前端 <code>AudioNode</code> 和侧栏 Rewrite 区显示模型链路、改写文案和配音播放器。</p>
|
||||||
|
<p><strong>边界:</strong>MiniMax 官方 Speech API 当前接入的是 TTS 配音,不替代 ASR;原始音频文案提取仍走现有 OpenAI-compatible audio transcription 入口。</p>
|
||||||
|
<p><strong>影响:</strong><code>api/main.py</code>、<code>api/.env.example</code>、<code>api/README.md</code>、<code>web/lib/api.ts</code>、<code>web/components/nodes/index.tsx</code>、<code>web/components/dashboard.tsx</code>、<code>web/app/page.tsx</code>、<code>docs/source-analysis.html</code>。</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
<article class="change">
|
||||||
|
<header>
|
||||||
|
<h3>2026-05-14 · 默认抽帧张数改为 12 帧</h3>
|
||||||
|
<span class="tag violet">InputNode</span>
|
||||||
|
<span class="tag blue">抽帧</span>
|
||||||
|
</header>
|
||||||
|
<div class="body">
|
||||||
|
<p><strong>问题:</strong>透明骨架人主题需要更稳定的素材覆盖,默认 5 帧太少,容易缺少可用于主体、场景和产品融合的角度。</p>
|
||||||
|
<p><strong>改动:</strong>后端 <code>KEYFRAME_COUNT</code> 默认值、前端抽帧 fallback、API client 默认参数都改为 12;抽帧设置里的张数选项把 12 放到第一位。透明骨架人目标仍会对每个候选做 Vision 验收,不合格候选自动换下一帧。</p>
|
||||||
|
<p><strong>影响:</strong><code>api/main.py</code>、<code>api/.env.example</code>、<code>web/lib/api.ts</code>、<code>web/app/page.tsx</code>、<code>web/components/nodes/index.tsx</code>、<code>docs/source-analysis.html</code>。</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
<article class="change">
|
<article class="change">
|
||||||
<header>
|
<header>
|
||||||
<h3>2026-05-14 · 抽帧新增透明骨架人 AI 验收目标</h3>
|
<h3>2026-05-14 · 抽帧新增透明骨架人 AI 验收目标</h3>
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ export default function Home() {
|
|||||||
const targetJob = jobs.find((item) => item.id === jobId)
|
const targetJob = jobs.find((item) => item.id === jobId)
|
||||||
if (!targetJob) return
|
if (!targetJob) return
|
||||||
const frameTarget = frameTargets[jobId] ?? "transparent_human"
|
const frameTarget = frameTargets[jobId] ?? "transparent_human"
|
||||||
const frameCount = frameCounts[jobId] ?? 5
|
const frameCount = frameCounts[jobId] ?? 12
|
||||||
const frameQuality = frameQualities[jobId] ?? "auto"
|
const frameQuality = frameQualities[jobId] ?? "auto"
|
||||||
const mode = options?.mode ?? (targetJob.frames.length > 0 ? "append" : "replace")
|
const mode = options?.mode ?? (targetJob.frames.length > 0 ? "append" : "replace")
|
||||||
setActiveJobId(jobId)
|
setActiveJobId(jobId)
|
||||||
@@ -892,7 +892,7 @@ export default function Home() {
|
|||||||
visual: !!job && (job.frames.length > 0 || (job.generated_videos?.length ?? 0) > 0),
|
visual: !!job && (job.frames.length > 0 || (job.generated_videos?.length ?? 0) > 0),
|
||||||
asr: !!job && job.transcript.length > 0,
|
asr: !!job && job.transcript.length > 0,
|
||||||
translate: !!job && (job.transcript.some((s) => s.zh) ?? false),
|
translate: !!job && (job.transcript.some((s) => s.zh) ?? false),
|
||||||
rewrite: !!job && (job.transcript.some((s) => s.zh) ?? false),
|
rewrite: !!job && !!job.audio_script?.rewritten_text,
|
||||||
}
|
}
|
||||||
setEdges((prev) => prev.map((e) => ({ ...e, animated: !!doneOf[e.source] })))
|
setEdges((prev) => prev.map((e) => ({ ...e, animated: !!doneOf[e.source] })))
|
||||||
}, [job, setEdges])
|
}, [job, setEdges])
|
||||||
|
|||||||
@@ -568,7 +568,7 @@ export const Dashboard = forwardRef<DashboardHandle, Props>(function Dashboard({
|
|||||||
!hasTranscript ? (
|
!hasTranscript ? (
|
||||||
<KanbanCard tone={key === "asr" ? "blue" : "cyan"} tags={[key === "asr" ? "ASR" : "Translate"]} title="等待数据">
|
<KanbanCard tone={key === "asr" ? "blue" : "cyan"} tags={[key === "asr" ? "ASR" : "Translate"]} title="等待数据">
|
||||||
<div className="text-[11.5px] text-[var(--text-soft)]">
|
<div className="text-[11.5px] text-[var(--text-soft)]">
|
||||||
{colState.asr === "running" ? "Gemini 转录中…" : "需要先完成关键帧抽取"}
|
{colState.asr === "running" ? "音频转写中…" : "需要先完成关键帧抽取"}
|
||||||
</div>
|
</div>
|
||||||
</KanbanCard>
|
</KanbanCard>
|
||||||
) : (
|
) : (
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ const STAGES: { key: JobStatus; label: string }[] = [
|
|||||||
{ key: "downloading", label: "下载视频" },
|
{ key: "downloading", label: "下载视频" },
|
||||||
{ key: "splitting", label: "拆分音视频" },
|
{ key: "splitting", label: "拆分音视频" },
|
||||||
{ key: "frames_extracted", label: "抽取关键帧" },
|
{ key: "frames_extracted", label: "抽取关键帧" },
|
||||||
{ key: "transcribing", label: "Gemini 转录+翻译" },
|
{ key: "transcribing", label: "音频转写+改写" },
|
||||||
{ key: "transcribed", label: "完成" },
|
{ key: "transcribed", label: "完成" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ const FRAME_TARGET_OPTIONS: Array<{ value: FrameExtractTarget; label: string; hi
|
|||||||
{ value: "expression", label: "表情瞬间", hint: "人物 / 动物表情倾向" },
|
{ value: "expression", label: "表情瞬间", hint: "人物 / 动物表情倾向" },
|
||||||
{ value: "motion", label: "动作峰值", hint: "动作变化更明显" },
|
{ value: "motion", label: "动作峰值", hint: "动作变化更明显" },
|
||||||
]
|
]
|
||||||
const FRAME_COUNT_OPTIONS = [3, 5, 8, 12]
|
const FRAME_COUNT_OPTIONS = [12, 8, 5, 3]
|
||||||
const FRAME_QUALITY_OPTIONS: Array<{ value: FrameExtractQuality; label: string; hint: string }> = [
|
const FRAME_QUALITY_OPTIONS: Array<{ value: FrameExtractQuality; label: string; hint: string }> = [
|
||||||
{ value: "auto", label: "自动", hint: "按电脑性能和视频时长自动选择" },
|
{ value: "auto", label: "自动", hint: "按电脑性能和视频时长自动选择" },
|
||||||
{ value: "fast", label: "快速", hint: "2fps / 360px,长视频省电" },
|
{ value: "fast", label: "快速", hint: "2fps / 360px,长视频省电" },
|
||||||
@@ -573,7 +573,7 @@ export function InputNode({ data, selected }: NodeProps<{ data: NodeData }> | an
|
|||||||
const thumbNaturalWidth = ready && j.height ? Math.max(96, Math.round(THUMBNAIL_HEIGHT * j.width / j.height)) : 96
|
const thumbNaturalWidth = ready && j.height ? Math.max(96, Math.round(THUMBNAIL_HEIGHT * j.width / j.height)) : 96
|
||||||
const toolWidth = Math.max(148, thumbNaturalWidth)
|
const toolWidth = Math.max(148, thumbNaturalWidth)
|
||||||
const target = d.frameTargets[j.id] ?? "transparent_human"
|
const target = d.frameTargets[j.id] ?? "transparent_human"
|
||||||
const count = d.frameCounts[j.id] ?? 5
|
const count = d.frameCounts[j.id] ?? 12
|
||||||
const quality = d.frameQualities[j.id] ?? "auto"
|
const quality = d.frameQualities[j.id] ?? "auto"
|
||||||
const jHasFrames = j.frames.length > 0
|
const jHasFrames = j.frames.length > 0
|
||||||
const jRunning = ["splitting", "transcribing"].includes(j.status)
|
const jRunning = ["splitting", "transcribing"].includes(j.status)
|
||||||
@@ -813,7 +813,7 @@ export function VideoFramePanelNode({ data }: any) {
|
|||||||
const frames = [...panelJob.frames].sort((a, b) => a.timestamp - b.timestamp)
|
const frames = [...panelJob.frames].sort((a, b) => a.timestamp - b.timestamp)
|
||||||
const aspect = panelJob.width && panelJob.height ? `${panelJob.width}/${panelJob.height}` : "9/16"
|
const aspect = panelJob.width && panelJob.height ? `${panelJob.width}/${panelJob.height}` : "9/16"
|
||||||
const panelTarget = d.frameTargets[panelJob.id] ?? "transparent_human"
|
const panelTarget = d.frameTargets[panelJob.id] ?? "transparent_human"
|
||||||
const panelCount = d.frameCounts[panelJob.id] ?? 5
|
const panelCount = d.frameCounts[panelJob.id] ?? 12
|
||||||
const panelQuality = d.frameQualities[panelJob.id] ?? "auto"
|
const panelQuality = d.frameQualities[panelJob.id] ?? "auto"
|
||||||
const panelRunning = ["splitting", "transcribing"].includes(panelJob.status)
|
const panelRunning = ["splitting", "transcribing"].includes(panelJob.status)
|
||||||
const dockText: Record<CanvasPanelDock, string> = {
|
const dockText: Record<CanvasPanelDock, string> = {
|
||||||
@@ -1997,7 +1997,7 @@ export function KeyframePanelNode({ data }: any) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ============================================================
|
/* ============================================================
|
||||||
5. ASRNode — Gemini 转录
|
5. ASRNode — 音频转写
|
||||||
============================================================ */
|
============================================================ */
|
||||||
export function ASRNode({ data, selected }: any) {
|
export function ASRNode({ data, selected }: any) {
|
||||||
const d: NodeData = data
|
const d: NodeData = data
|
||||||
@@ -2012,7 +2012,7 @@ export function ASRNode({ data, selected }: any) {
|
|||||||
onTogglePin={() => d.onToggleNodePin?.("asr")}
|
onTogglePin={() => d.onToggleNodePin?.("asr")}
|
||||||
>
|
>
|
||||||
<div className="text-[11.5px] text-[var(--text-soft)]">
|
<div className="text-[11.5px] text-[var(--text-soft)]">
|
||||||
Gemini 2.5 · 英文带时间戳分段
|
OpenAI-compatible ASR · 英文带时间戳分段
|
||||||
</div>
|
</div>
|
||||||
{d.job && d.job.transcript.length > 0 && (
|
{d.job && d.job.transcript.length > 0 && (
|
||||||
<div className="mt-2 max-h-24 overflow-y-auto text-[11px] space-y-1 text-[var(--text-strong)]">
|
<div className="mt-2 max-h-24 overflow-y-auto text-[11px] space-y-1 text-[var(--text-strong)]">
|
||||||
@@ -2069,13 +2069,14 @@ export function TranslateNode({ data, selected }: any) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ============================================================
|
/* ============================================================
|
||||||
7. RewriteNode (placeholder)
|
7. RewriteNode
|
||||||
============================================================ */
|
============================================================ */
|
||||||
export function RewriteNode({ data, selected }: any) {
|
export function RewriteNode({ data, selected }: any) {
|
||||||
const d: NodeData = data
|
const d: NodeData = data
|
||||||
|
const rewrittenText = d.job?.audio_script?.rewritten_text?.trim() ?? ""
|
||||||
return (
|
return (
|
||||||
<NodeShell
|
<NodeShell
|
||||||
type="ai" status="pending"
|
type="ai" status={rewrittenText ? "done" : d.job?.audio_script?.status === "rewriting" ? "running" : "pending"}
|
||||||
icon={<FileEdit className="h-4 w-4" />}
|
icon={<FileEdit className="h-4 w-4" />}
|
||||||
title="产品文案 · Rewrite"
|
title="产品文案 · Rewrite"
|
||||||
subtitle="STEP 5 · 接 SKG 卖点"
|
subtitle="STEP 5 · 接 SKG 卖点"
|
||||||
@@ -2083,19 +2084,20 @@ export function RewriteNode({ data, selected }: any) {
|
|||||||
pinned={d.pinnedNodes?.has("rewrite")}
|
pinned={d.pinnedNodes?.has("rewrite")}
|
||||||
onTogglePin={() => d.onToggleNodePin?.("rewrite")}
|
onTogglePin={() => d.onToggleNodePin?.("rewrite")}
|
||||||
>
|
>
|
||||||
<textarea
|
{rewrittenText ? (
|
||||||
placeholder="粘贴 SKG 产品信息 / 关键卖点(可作为视频脚本和镜头动作参考)"
|
<div className="rounded-md border border-emerald-400/25 bg-emerald-400/10 px-2.5 py-2 text-[11.5px] leading-relaxed text-[var(--text-strong)]">
|
||||||
rows={3}
|
{rewrittenText}
|
||||||
disabled
|
</div>
|
||||||
className="w-full text-[11.5px] px-2.5 py-2 rounded-md bg-white/30 dark:bg-white/[0.03] border border-dashed border-black/15 dark:border-white/10 placeholder:text-[var(--text-faint)] text-[var(--text-strong)] resize-none opacity-70"
|
) : (
|
||||||
/>
|
<div className="text-[11px] text-[var(--text-soft)]">转录完成后自动接 SKG 卖点改写。</div>
|
||||||
<div className="mt-1.5 text-[10px] text-[var(--text-faint)]">下一冲刺接入</div>
|
)}
|
||||||
|
<div className="mt-1.5 text-[10px] text-[var(--text-faint)]">{d.job?.audio_script?.rewrite_model || "AUDIO_REWRITE_MODEL"}</div>
|
||||||
</NodeShell>
|
</NodeShell>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ============================================================
|
/* ============================================================
|
||||||
5b. AudioNode — 合并 ASR + 翻译 + 改写为一个"音频处理"节点(占位卡片,无填充)
|
5b. AudioNode — 合并 ASR + 翻译 + 改写 + MiniMax 配音
|
||||||
============================================================ */
|
============================================================ */
|
||||||
export function AudioNode({ data, selected }: any) {
|
export function AudioNode({ data, selected }: any) {
|
||||||
const d: NodeData = data
|
const d: NodeData = data
|
||||||
@@ -2127,7 +2129,7 @@ export function AudioNode({ data, selected }: any) {
|
|||||||
<div>
|
<div>
|
||||||
音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
|
音轨 → ASR 转录 → 英中翻译 → SKG 口播改写 → MiniMax 配音<br />
|
||||||
<span className="text-[var(--text-faint)] font-mono">
|
<span className="text-[var(--text-faint)] font-mono">
|
||||||
{audioScript?.rewrite_model || "Gemini 2.5 Pro"} → {audioScript?.voice_model || "MiniMax T2A"}
|
{audioScript?.rewrite_model || "AUDIO_REWRITE_MODEL"} → {audioScript?.voice_model || "MiniMax T2A"}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
{rewrittenText && (
|
{rewrittenText && (
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ export function TranscriptPanel({ segments, loading, onSeek }: Props) {
|
|||||||
if (segments.length === 0) {
|
if (segments.length === 0) {
|
||||||
return (
|
return (
|
||||||
<div className="glass-card flex h-64 items-center justify-center text-white/30 text-sm">
|
<div className="glass-card flex h-64 items-center justify-center text-white/30 text-sm">
|
||||||
{loading ? "Gemini 转录中…" : "转录将在抽帧后自动开始"}
|
{loading ? "音频转写中…" : "转录将在抽帧后自动开始"}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -26,10 +26,10 @@ export function TranscriptPanel({ segments, loading, onSeek }: Props) {
|
|||||||
<div className="glass-card overflow-hidden">
|
<div className="glass-card overflow-hidden">
|
||||||
<div className="grid grid-cols-2 divide-x divide-white/10">
|
<div className="grid grid-cols-2 divide-x divide-white/10">
|
||||||
<div className="p-4">
|
<div className="p-4">
|
||||||
<div className="mb-3 text-[11px] uppercase tracking-widest text-white/40">English (Gemini ASR)</div>
|
<div className="mb-3 text-[11px] uppercase tracking-widest text-white/40">English ASR</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="p-4">
|
<div className="p-4">
|
||||||
<div className="mb-3 text-[11px] uppercase tracking-widest text-white/40">中文翻译 (Gemini)</div>
|
<div className="mb-3 text-[11px] uppercase tracking-widest text-white/40">中文翻译</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="max-h-[480px] overflow-y-auto">
|
<div className="max-h-[480px] overflow-y-auto">
|
||||||
|
|||||||
@@ -441,7 +441,7 @@ export async function triggerTranscribe(id: string): Promise<Job> {
|
|||||||
|
|
||||||
export async function analyzeJob(
|
export async function analyzeJob(
|
||||||
id: string,
|
id: string,
|
||||||
frames = 5,
|
frames = 12,
|
||||||
target: FrameExtractTarget = "balanced",
|
target: FrameExtractTarget = "balanced",
|
||||||
mode: FrameExtractMode = "replace",
|
mode: FrameExtractMode = "replace",
|
||||||
quality: FrameExtractQuality = "auto",
|
quality: FrameExtractQuality = "auto",
|
||||||
|
|||||||
Reference in New Issue
Block a user