From 9c41cafb1a25eeec169f4384922819a64f216876 Mon Sep 17 00:00:00 2001 From: kang Date: Sat, 30 May 2026 17:04:45 +0800 Subject: [PATCH] feat: use OpenAI Sora for long videos --- .env.local.example | 8 +- .project.json | 4 +- RULES.md | 15 +- deploy/.env.production.example | 8 +- src/app/api/video/generate/route.ts | 15 +- src/app/api/video/status/[taskId]/route.ts | 45 ++++- src/app/page.tsx | 4 +- src/lib/storage.ts | 18 ++ src/lib/templates.ts | 30 +-- src/lib/types.ts | 6 +- src/lib/videoProviders.ts | 221 ++++++++++++++++++++- 11 files changed, 327 insertions(+), 47 deletions(-) diff --git a/.env.local.example b/.env.local.example index 2774757..8e9db13 100644 --- a/.env.local.example +++ b/.env.local.example @@ -4,7 +4,13 @@ GPT_TEXT_MODEL=gpt-5.5 GPT_IMAGE_MODEL=gpt-image-2 GPT_API_BASE=https://api.openai.com/v1 -# 视频生成固定走 Seedance。未配置 Key 时 /api/video/generate 返回 503。 +# 视频生成默认走 OpenAI Sora;如需回退 Seedance,设置 VIDEO_PROVIDER=seedance。 +VIDEO_PROVIDER=openai_sora +OPENAI_VIDEO_MODEL=sora-2-pro +OPENAI_VIDEO_MIN_SECONDS=60 +OPENAI_VIDEO_QUALITY=high + +# Seedance 作为可选回退 provider。未配置 Key 且 VIDEO_PROVIDER=seedance 时 /api/video/generate 返回 503。 SEEDANCE_API_KEY= SEEDANCE_MODEL=doubao-seedance-2-0-260128 SEEDANCE_API_BASE=https://ark.cn-beijing.volces.com/api/v3 diff --git a/.project.json b/.project.json index a393c8e..e7a7e74 100644 --- a/.project.json +++ b/.project.json @@ -4,7 +4,7 @@ { "env" : "OPENAI_API_KEY", "name" : "OPENAI_API_KEY", - "note" : "GPT 文本\/结构化\/图片生成;没填则图片 mock" + "note" : "GPT 文本\/结构化\/图片生成 + OpenAI Sora 视频;没填则图片 mock、视频不可用" }, { "env" : "SEEDANCE_API_KEY", @@ -37,7 +37,7 @@ "username" : "kangwan" }, "stack" : [ - "Next.js + GPT + Seedance", + "Next.js + GPT + OpenAI Sora", "Docker Compose local\/prod parity", "Coolify Traefik" ], diff --git a/RULES.md b/RULES.md index 36be117..41687c6 100644 --- a/RULES.md +++ b/RULES.md @@ -10,6 +10,7 @@ - 平台:个人 VPS `76.13.31.179`,Docker Compose,接入现有 Coolify Traefik - 发布状态:VPS 生产已发布,仅个人使用 - 最近生产部署:2026-05-22,视频面板修复 60 秒成片任务 ID 映射;`video_turntable_60s` 等已完成视频会替代对应默认模板卡片,不再重复显示不可播放的空视频项;对应代码提交 `7abbb7d` +- 待部署变更:视频 provider 已改为默认 OpenAI Sora(`VIDEO_PROVIDER=openai_sora`),Seedance 仅作为可选回退;视频模板目标时长统一不少于 60 秒,并通过 OpenAI extend 链路补足长视频 - 服务名 / 容器名:`ai-toy-patent-workflow` - 服务器路径:`/opt/ai-toy-patent-workflow` - 主站 / 前端:https://ai-toy.kang-kang.com @@ -38,11 +39,15 @@ - 风格示意图:运行 `npm run styles:previews -- --force` 用 GPT 图片模型生成 `public/style-previews/*.png`;UI 左侧风格卡片直接引用这些小图 ## 环境变量 -- `OPENAI_API_KEY` — GPT API Key;文本/结构化/图片生成统一走 GPT 最高规格配置 +- `OPENAI_API_KEY` — GPT / OpenAI API Key;文本、结构化、图片生成以及默认 OpenAI Sora 视频生成共用 - `GPT_TEXT_MODEL` — 默认 `gpt-5.5`,用于角色设定等结构化输出 - `GPT_IMAGE_MODEL` — 默认 `gpt-image-2`,用于意向图和三类素材包图片生成 - `GPT_API_BASE` — 默认 `https://api.openai.com/v1` -- `SEEDANCE_API_KEY` — Seedance 视频生成 Key;未配置时视频接口返回 503 +- `VIDEO_PROVIDER` — 默认 `openai_sora`;需要回退时可设为 `seedance` +- `OPENAI_VIDEO_MODEL` — 默认 `sora-2-pro`,用于 OpenAI 视频生成 +- `OPENAI_VIDEO_MIN_SECONDS` — 默认 `60`;视频模板目标时长不得低于 60 秒 +- `OPENAI_VIDEO_QUALITY` — 默认生产建议 `high`,对应 OpenAI 允许的视频输出尺寸 +- `SEEDANCE_API_KEY` — Seedance 视频生成 Key;仅 `VIDEO_PROVIDER=seedance` 时使用,未配置则视频接口返回 503 - `SEEDANCE_MODEL` — 默认 `doubao-seedance-2-0-260128` - `SEEDANCE_API_BASE` — 默认 `https://ark.cn-beijing.volces.com/api/v3` - `PUBLIC_APP_URL` — 生产填公网入口,用于把 `/api/img/...` 补成 Seedance 可访问的绝对 URL @@ -57,7 +62,7 @@ ## 规则 - 全项目规则真源:`/Users/kangwan/Projects/code/20260317-rules-dashboard/RULES.md` - 文本/结构化/图片生成统一使用 GPT 最高规格配置 -- 视频生成固定使用 Seedance +- 视频生成默认使用 OpenAI Sora;Seedance 只作为可选回退 provider - 不允许编造不存在的部署域名、账号、密码 ## 图像链路事实 @@ -95,10 +100,10 @@ 5. 锁定角色设定 `CharacterSpec` 6. 串行生成图片包:必须从专利包开始,顺序为 `专利包 -> 配件包 -> 生产打样包 -> 宣发包` 7. 前一个图片包完整生成后,下一个图片包才解锁;不提供“一键全包”入口或全包 API -8. 四个图片包完成后,才解锁文案模板和 Seedance 视频任务:旋转展示、开箱、触感细节、角色故事 +8. 四个图片包完成后,才解锁文案模板和 OpenAI Sora 视频任务:旋转展示、开箱、触感细节、角色故事、工厂预览;每条视频目标时长不少于 60 秒 9. 侧栏保留历史会话,点击切换 ## 后续路线 - 导出专利包:PNG高清 + PDF合订 - ZIP/PDF 打包下载 -- Seedance 任务轮询 UI +- OpenAI Sora 长视频任务轮询 UI 细化 diff --git a/deploy/.env.production.example b/deploy/.env.production.example index 26b909d..9c803f4 100644 --- a/deploy/.env.production.example +++ b/deploy/.env.production.example @@ -4,7 +4,13 @@ GPT_TEXT_MODEL=gpt-5.5 GPT_IMAGE_MODEL=gpt-image-2 GPT_API_BASE=https://api.openai.com/v1 -# Seedance video generation. +# Video generation defaults to OpenAI Sora. Set VIDEO_PROVIDER=seedance only for fallback. +VIDEO_PROVIDER=openai_sora +OPENAI_VIDEO_MODEL=sora-2-pro +OPENAI_VIDEO_MIN_SECONDS=60 +OPENAI_VIDEO_QUALITY=high + +# Optional Seedance fallback. SEEDANCE_API_KEY= SEEDANCE_MODEL=doubao-seedance-2-0-260128 SEEDANCE_API_BASE=https://ark.cn-beijing.volces.com/api/v3 diff --git a/src/app/api/video/generate/route.ts b/src/app/api/video/generate/route.ts index e6c326e..62524fb 100644 --- a/src/app/api/video/generate/route.ts +++ b/src/app/api/video/generate/route.ts @@ -1,6 +1,6 @@ import { NextResponse } from 'next/server'; import { recordEvent } from '@/lib/auditDb'; -import { generateSeedanceVideo } from '@/lib/videoProviders'; +import { activeVideoProvider, generateVideo } from '@/lib/videoProviders'; import { loadSession, saveRemoteVideo, saveSession } from '@/lib/storage'; import { VIDEO_TEMPLATES } from '@/lib/templates'; import type { VideoGenerationRequest, VideoTask } from '@/lib/types'; @@ -10,9 +10,10 @@ export const dynamic = 'force-dynamic'; export async function POST(req: Request) { const body = (await req.json()) as VideoGenerationRequest; + const provider = activeVideoProvider(); try { - recordEvent({ action: 'video.generate_started', sessionId: body.sessionId, targetType: 'video', targetId: body.templateId, status: 'started', provider: 'seedance', metadata: { ratio: body.ratio, duration: body.duration, hasImage: Boolean(body.imageUrl), refs: body.references?.length ?? 0 } }); - const response = await generateSeedanceVideo(body); + recordEvent({ action: 'video.generate_started', sessionId: body.sessionId, targetType: 'video', targetId: body.templateId, status: 'started', provider, metadata: { ratio: body.ratio, duration: body.duration, hasImage: Boolean(body.imageUrl), refs: body.references?.length ?? 0 } }); + const response = await generateVideo(body); let task: VideoTask | undefined; let videoUrl = response.videoUrl; @@ -37,7 +38,7 @@ export async function POST(req: Request) { status: response.status, videoUrl, ratio: body.ratio || template?.ratio || '16:9', - duration: body.duration || template?.duration || 6, + duration: body.duration || template?.duration || 60, submittedAt: now, updatedAt: now, raw: response.raw, @@ -49,13 +50,13 @@ export async function POST(req: Request) { await saveSession(session); } - recordEvent({ action: 'video.generate_submitted', sessionId: body.sessionId, targetType: 'video', targetId: response.taskId ?? body.templateId ?? response.status, status: 'queued', provider: 'seedance', metadata: { status: response.status, templateId: body.templateId } }); + recordEvent({ action: 'video.generate_submitted', sessionId: body.sessionId, targetType: 'video', targetId: response.taskId ?? body.templateId ?? response.status, status: 'queued', provider: response.provider, metadata: { status: response.status, templateId: body.templateId } }); return NextResponse.json({ ...response, videoUrl, task }); } catch (error) { const message = String(error); - recordEvent({ action: 'video.generate_failed', sessionId: body.sessionId, targetType: 'video', targetId: body.templateId, status: 'error', provider: 'seedance', message }); + recordEvent({ action: 'video.generate_failed', sessionId: body.sessionId, targetType: 'video', targetId: body.templateId, status: 'error', provider, message }); return NextResponse.json({ error: message }, { - status: message.includes('SEEDANCE_API_KEY missing') ? 503 : 500, + status: message.includes('SEEDANCE_API_KEY missing') || message.includes('OPENAI_API_KEY missing') ? 503 : 500, }); } } diff --git a/src/app/api/video/status/[taskId]/route.ts b/src/app/api/video/status/[taskId]/route.ts index 65a643a..0f82f08 100644 --- a/src/app/api/video/status/[taskId]/route.ts +++ b/src/app/api/video/status/[taskId]/route.ts @@ -1,7 +1,8 @@ import { NextResponse } from 'next/server'; import { recordEvent } from '@/lib/auditDb'; -import { getSeedanceVideoTask } from '@/lib/videoProviders'; -import { loadSession, saveRemoteVideo, saveSession } from '@/lib/storage'; +import { activeVideoProvider, downloadOpenAIVideoContent, extendOpenAIVideo, getVideoTask, openAIVideoSeconds } from '@/lib/videoProviders'; +import { loadSession, saveRemoteVideo, saveSession, saveVideoBuffer } from '@/lib/storage'; +import type { VideoTask } from '@/lib/types'; export const runtime = 'nodejs'; export const dynamic = 'force-dynamic'; @@ -9,21 +10,43 @@ export const dynamic = 'force-dynamic'; export async function GET(req: Request, ctx: { params: Promise<{ taskId: string }> }) { const { taskId } = await ctx.params; const sessionId = new URL(req.url).searchParams.get('sessionId')?.trim(); + const session = sessionId ? await loadSession(sessionId) : null; + const existingTask = session?.videoTasks?.find(item => item.taskId === taskId); + const provider = existingTask?.provider ?? activeVideoProvider(); try { - const response = await getSeedanceVideoTask(taskId); - let task = undefined; + let response = await getVideoTask(taskId, provider, existingTask?.duration); + let task: VideoTask | undefined = undefined; let videoUrl = response.videoUrl; - if (sessionId) { - const session = await loadSession(sessionId); - if (session?.videoTasks?.length) { + if (sessionId && session) { + if (session.videoTasks?.length) { const index = session.videoTasks.findIndex(item => item.taskId === taskId); if (index >= 0) { - if (videoUrl) { + const targetDuration = session.videoTasks[index].duration; + const currentSeconds = openAIVideoSeconds(response.raw); + + if (response.provider === 'openai_sora' && response.status === 'succeeded' && currentSeconds > 0 && currentSeconds < targetDuration) { + response = await extendOpenAIVideo({ + taskId: response.taskId || taskId, + prompt: session.videoTasks[index].prompt, + currentSeconds, + targetSeconds: targetDuration, + }); + videoUrl = undefined; + } else if (response.provider === 'openai_sora' && response.status === 'succeeded' && response.taskId) { + if (session.videoTasks[index].videoUrl) { + videoUrl = session.videoTasks[index].videoUrl; + } else { + const content = await downloadOpenAIVideoContent(response.taskId); + videoUrl = await saveVideoBuffer(session.id, response.taskId, content.buffer, content.type); + } + } else if (videoUrl) { videoUrl = await saveRemoteVideo(session.id, taskId, videoUrl); } + task = { ...session.videoTasks[index], + taskId: response.taskId ?? session.videoTasks[index].taskId, status: response.status, videoUrl: videoUrl ?? session.videoTasks[index].videoUrl, model: response.model, @@ -40,13 +63,13 @@ export async function GET(req: Request, ctx: { params: Promise<{ taskId: string } } - recordEvent({ action: 'video.status_checked', sessionId, targetType: 'video', targetId: taskId, status: 'ok', provider: 'seedance', metadata: { status: response.status } }); + recordEvent({ action: 'video.status_checked', sessionId, targetType: 'video', targetId: taskId, status: 'ok', provider: response.provider, metadata: { status: response.status, taskId: response.taskId } }); return NextResponse.json({ ...response, videoUrl, task }); } catch (error) { const message = String(error); - recordEvent({ action: 'video.status_failed', sessionId, targetType: 'video', targetId: taskId, status: 'error', provider: 'seedance', message }); + recordEvent({ action: 'video.status_failed', sessionId, targetType: 'video', targetId: taskId, status: 'error', provider, message }); return NextResponse.json({ error: message }, { - status: message.includes('SEEDANCE_API_KEY missing') ? 503 : 500, + status: message.includes('SEEDANCE_API_KEY missing') || message.includes('OPENAI_API_KEY missing') ? 503 : 500, }); } } diff --git a/src/app/page.tsx b/src/app/page.tsx index e7e8419..3c216b1 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -550,7 +550,7 @@ export default function Home() { const d: VideoGenerationResponse = await r.json(); await reloadCurrent(sessionId); if ((d.status === 'submitted' || d.status === 'processing') && remaining > 1) { - scheduleVideoRefresh(sessionId, taskId, remaining - 1); + scheduleVideoRefresh(sessionId, d.taskId || taskId, remaining - 1); } } }, 15000); @@ -634,7 +634,7 @@ export default function Home() { }), }); if (!r.ok) { - alert('Seedance 视频提交失败:' + (await r.text())); + alert('视频提交失败:' + (await r.text())); return; } const d: VideoGenerationResponse = await r.json(); diff --git a/src/lib/storage.ts b/src/lib/storage.ts index b9c2b99..5b8f535 100644 --- a/src/lib/storage.ts +++ b/src/lib/storage.ts @@ -457,6 +457,24 @@ export async function saveRemoteVideo(sessionId: string, taskId: string, url: st return `/api/video-file/${filename}`; } +export async function saveVideoBuffer(sessionId: string, taskId: string, buffer: Buffer, type = 'video/mp4'): Promise { + await ensureDirs(); + const ext = extFromMime(type); + const safeTaskId = safePart(taskId); + const filename = `${safePart(sessionId)}_${safeTaskId}.${ext === 'bin' ? 'mp4' : ext}`; + const file = path.join(VIDEO_DIR, filename); + await fs.writeFile(file, buffer); + recordEvent({ + action: 'video.saved', + sessionId, + targetType: 'video', + targetId: taskId, + status: 'ok', + metadata: { filename, bytes: buffer.length, type }, + }); + return `/api/video-file/${filename}`; +} + export async function readVideoFile(filename: string): Promise<{ buf: Buffer; type: string } | null> { try { const info = await statVideoFile(filename); diff --git a/src/lib/templates.ts b/src/lib/templates.ts index 8a162e2..28dcf3b 100644 --- a/src/lib/templates.ts +++ b/src/lib/templates.ts @@ -16,42 +16,42 @@ export const VIDEO_TEMPLATES = [ { id: 'video_turntable', title: '360 度旋转展示', - description: '用于电商和内部评审,展示整体体积、正背侧轮廓。', - duration: 6, + description: '不少于 60 秒,用于电商和内部评审,展示整体体积、正背侧轮廓。', + duration: 60, ratio: '16:9', - promptTemplate: '生成 360 度旋转展示视频:{character}. 白底或浅灰棚拍,镜头稳定,玩具缓慢旋转,展示正面、侧面、背面、顶部细节,材质、表面质感、五官和配件必须严格贴合角色设定。', + promptTemplate: '生成 360 度旋转展示视频:{character}. 白底或浅灰棚拍,镜头稳定,玩具缓慢旋转,展示正面、侧面、背面、顶部细节,材质、表面质感、五官和配件必须严格贴合角色设定。产品尺寸按 45cm 大号智能陪伴毛绒娃娃表现,镜头中要能感知 40cm 以上的抱抱玩偶体量。', }, { id: 'video_unboxing', title: '开箱短片', - description: '用于新品宣发,展示包装到玩具出现的过程。', - duration: 8, + description: '不少于 60 秒,用于新品宣发,展示包装到玩具出现的过程。', + duration: 60, ratio: '9:16', - promptTemplate: '生成玩具开箱短片:{character}. 竖版社媒风格,从礼盒或包装打开到玩具出现,温暖但克制的棚拍光线,突出礼物感、收藏感、角色识别点和配件陈列。', + promptTemplate: '生成玩具开箱短片:{character}. 竖版社媒风格,从礼盒或包装打开到玩具出现,温暖但克制的棚拍光线,突出礼物感、收藏感、角色识别点和配件陈列。玩具为 45cm 左右的大号智能陪伴毛绒娃娃,包装和手部比例必须支持 40cm 以上尺寸。', }, { id: 'video_touch_detail', title: '触感细节', - description: '展示材质、五官、服装纹理和配件细节。', - duration: 6, + description: '不少于 60 秒,展示材质、五官、服装纹理和配件细节。', + duration: 60, ratio: '9:16', - promptTemplate: '生成玩具细节短片:{character}. 近景镜头,展示角色设定中的核心材质、五官或面部表达、服装纹理、手脚细节、标志性配件和包装小物,节奏清楚,避免加入设定外材质或无关部件。', + promptTemplate: '生成玩具细节短片:{character}. 近景镜头,展示角色设定中的核心材质、五官或面部表达、服装纹理、手脚细节、标志性配件和包装小物,节奏清楚,避免加入设定外材质或无关部件。必须体现 45cm 大号毛绒娃娃的厚实体量、柔软回弹和可拥抱尺度。', }, { id: 'video_story_intro', title: '角色故事介绍', - description: '用于 IP 设定和社媒发布。', - duration: 8, + description: '不少于 60 秒,用于 IP 设定和社媒发布。', + duration: 60, ratio: '16:9', - promptTemplate: '生成玩具角色故事介绍视频:{character}. 轻剧情镜头,围绕角色设定的核心使用场景登场,展示表情变化、标志性配件、色彩气质和陪伴感,适合新品发布。', + promptTemplate: '生成玩具角色故事介绍视频:{character}. 轻剧情镜头,围绕角色设定的核心使用场景登场,展示表情变化、标志性配件、色彩气质和陪伴感,适合新品发布。故事中玩具始终是 45cm 左右的大号智能陪伴毛绒娃娃,可以被双手抱住,不要缩成桌面小摆件。', }, { id: 'video_factory_preview', title: '工厂预览短片', - description: '用于打样前内部沟通,展示外观、尺寸、材料、拆件和包装要点。', - duration: 8, + description: '不少于 60 秒,用于打样前内部沟通,展示外观、尺寸、材料、拆件和包装要点。', + duration: 60, ratio: '16:9', - promptTemplate: '生成工厂预览概念短片:{character}. 16:9,面向内部沟通,展示外观、尺寸、材料、拆件和包装要点,镜头清楚克制,不做消费者营销话术。', + promptTemplate: '生成工厂预览概念短片:{character}. 16:9,面向内部沟通,展示外观、尺寸、材料、拆件和包装要点,镜头清楚克制,不做消费者营销话术。尺寸基准写死为成品高度约 45cm,宽约 32cm,侧深约 28cm,背宽约 33cm,必须保持 40cm 以上大尺寸。', }, ] as const; diff --git a/src/lib/types.ts b/src/lib/types.ts index 711a1fd..7cfc797 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -299,8 +299,10 @@ export type VideoGenerationRequest = { watermark?: boolean; }; +export type VideoProvider = 'seedance' | 'openai_sora'; + export type VideoGenerationResponse = { - provider: 'seedance'; + provider: VideoProvider; model: string; taskId?: string; status: 'submitted' | 'processing' | 'succeeded' | 'failed'; @@ -315,7 +317,7 @@ export type VideoTask = { description: string; prompt: string; anchorImageUrl?: string; - provider: 'seedance'; + provider: VideoProvider; model: string; taskId?: string; status: VideoGenerationResponse['status']; diff --git a/src/lib/videoProviders.ts b/src/lib/videoProviders.ts index 15bfecb..b890288 100644 --- a/src/lib/videoProviders.ts +++ b/src/lib/videoProviders.ts @@ -1,12 +1,60 @@ -import type { VideoGenerationRequest, VideoGenerationResponse } from './types'; +import type { VideoGenerationRequest, VideoGenerationResponse, VideoProvider } from './types'; +import { readImageUrl } from './storage'; export const SEEDANCE_MODEL = process.env.SEEDANCE_MODEL || 'doubao-seedance-2-0-260128'; const SEEDANCE_API_BASE = process.env.SEEDANCE_API_BASE || 'https://ark.cn-beijing.volces.com/api/v3'; +export const OPENAI_VIDEO_MODEL = process.env.OPENAI_VIDEO_MODEL || 'sora-2-pro'; +const OPENAI_API_BASE = process.env.OPENAI_API_BASE || process.env.GPT_API_BASE || 'https://api.openai.com/v1'; +const MIN_OPENAI_TARGET_SECONDS = Math.max(Number(process.env.OPENAI_VIDEO_MIN_SECONDS || 60), 60); + +type OpenAIVideoRaw = { + id?: string; + model?: string; + status?: string; + video_url?: string; + seconds?: string | number; + size?: string; + progress?: number; + error?: { code?: string; message?: string }; +}; + +export function activeVideoProvider(): VideoProvider { + return process.env.VIDEO_PROVIDER === 'seedance' ? 'seedance' : 'openai_sora'; +} function durationOrDefault(duration?: number): number { return Math.min(Math.max(duration ?? 6, 3), 15); } +function openAITargetDuration(duration?: number): number { + return Math.max(duration ?? MIN_OPENAI_TARGET_SECONDS, MIN_OPENAI_TARGET_SECONDS); +} + +function openAIInitialSeconds(targetDuration: number): '4' | '8' | '12' { + if (targetDuration >= 12) return '12'; + if (targetDuration >= 8) return '8'; + return '4'; +} + +export function openAIVideoSeconds(raw?: unknown): number { + const value = (raw as OpenAIVideoRaw | undefined)?.seconds; + if (typeof value === 'number') return Number.isFinite(value) ? value : 0; + if (typeof value === 'string') { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : 0; + } + return 0; +} + +export function openAIExtensionSeconds(currentSeconds: number, targetSeconds: number): '4' | '8' | '12' | '16' | '20' { + const remaining = targetSeconds - currentSeconds; + if (remaining >= 20) return '20'; + if (remaining >= 16) return '16'; + if (remaining >= 12) return '12'; + if (remaining >= 8) return '8'; + return '4'; +} + function normalizeStatus(status?: string): VideoGenerationResponse['status'] { if (status === 'succeeded') return 'succeeded'; if (status === 'failed') return 'failed'; @@ -14,6 +62,13 @@ function normalizeStatus(status?: string): VideoGenerationResponse['status'] { return 'submitted'; } +function normalizeOpenAIStatus(status?: string): VideoGenerationResponse['status'] { + if (status === 'completed') return 'succeeded'; + if (status === 'failed') return 'failed'; + if (status === 'in_progress') return 'processing'; + return 'submitted'; +} + function publicUrlOrUndefined(url?: string): string | undefined { if (!url) return undefined; if (url.startsWith('/')) { @@ -47,6 +102,53 @@ function buildContent(opts: VideoGenerationRequest): Array { + const refs = [...(opts.references ?? [])]; + if (opts.imageUrl) refs.unshift({ type: 'image_url', url: opts.imageUrl, role: 'reference_image' }); + const image = refs.find(ref => ref.type === 'image_url'); + const url = publicUrlOrUndefined(image?.url); + if (url) return { image_url: url }; + if (!image?.url) return undefined; + + const source = await readImageUrl(image.url); + if (source.type.includes('svg')) return undefined; + const dataUrl = `data:${source.type};base64,${source.buf.toString('base64')}`; + return dataUrl.length <= 20 * 1024 * 1024 ? { image_url: dataUrl } : undefined; +} + +function openAIResponse(raw: OpenAIVideoRaw, fallbackTaskId: string | undefined, targetDuration: number): VideoGenerationResponse { + const taskId = raw.id || fallbackTaskId; + return { + provider: 'openai_sora', + model: raw.model || OPENAI_VIDEO_MODEL, + taskId, + status: normalizeOpenAIStatus(raw.status), + videoUrl: raw.video_url, + raw: { + ...raw, + target_seconds: targetDuration, + current_seconds: openAIVideoSeconds(raw), + }, + }; +} + export async function generateSeedanceVideo(opts: VideoGenerationRequest): Promise { const key = process.env.SEEDANCE_API_KEY; if (!key) throw new Error('SEEDANCE_API_KEY missing'); @@ -92,6 +194,123 @@ export async function generateSeedanceVideo(opts: VideoGenerationRequest): Promi }; } +export async function generateOpenAIVideo(opts: VideoGenerationRequest): Promise { + const key = process.env.OPENAI_API_KEY; + if (!key) throw new Error('OPENAI_API_KEY missing'); + if (!opts.prompt?.trim()) throw new Error('prompt required'); + + const targetDuration = openAITargetDuration(opts.duration); + const body: Record = { + model: OPENAI_VIDEO_MODEL, + prompt: withProductVideoConstraints(opts.prompt, targetDuration), + seconds: openAIInitialSeconds(targetDuration), + size: openAISizeForRatio(opts.ratio, opts.resolution), + }; + const inputReference = await openAIInputReference(opts); + if (inputReference) body.input_reference = inputReference; + + let res = await fetch(`${OPENAI_API_BASE}/videos`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${key}`, + }, + body: JSON.stringify(body), + }); + + let rawText = !res.ok ? await res.text() : ''; + if (!res.ok && inputReference && /must match the requested width and height/i.test(rawText)) { + delete body.input_reference; + res = await fetch(`${OPENAI_API_BASE}/videos`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${key}`, + }, + body: JSON.stringify(body), + }); + rawText = !res.ok ? await res.text() : ''; + } + + if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${rawText}`); + const raw = await res.json() as OpenAIVideoRaw; + if (inputReference && !body.input_reference) { + const response = openAIResponse(raw, undefined, targetDuration); + return { ...response, raw: { ...(response.raw as object), input_reference_dropped: 'size_mismatch' } }; + } + return openAIResponse(raw, undefined, targetDuration); +} + +export async function extendOpenAIVideo(opts: { + taskId: string; + prompt: string; + currentSeconds: number; + targetSeconds: number; +}): Promise { + const key = process.env.OPENAI_API_KEY; + if (!key) throw new Error('OPENAI_API_KEY missing'); + if (!opts.taskId) throw new Error('taskId required'); + if (!opts.prompt?.trim()) throw new Error('prompt required'); + + const seconds = openAIExtensionSeconds(opts.currentSeconds, opts.targetSeconds); + const res = await fetch(`${OPENAI_API_BASE}/videos/extensions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${key}`, + }, + body: JSON.stringify({ + prompt: withProductVideoConstraints(opts.prompt, opts.targetSeconds), + seconds, + video: { id: opts.taskId }, + }), + }); + + if (!res.ok) throw new Error(`OpenAI video extend ${res.status}: ${await res.text()}`); + const raw = await res.json() as OpenAIVideoRaw; + return openAIResponse(raw, opts.taskId, opts.targetSeconds); +} + +export async function getOpenAIVideoTask(taskId: string, targetDuration = MIN_OPENAI_TARGET_SECONDS): Promise { + const key = process.env.OPENAI_API_KEY; + if (!key) throw new Error('OPENAI_API_KEY missing'); + if (!taskId) throw new Error('taskId required'); + + const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}`, { + headers: { Authorization: `Bearer ${key}` }, + }); + + if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${await res.text()}`); + const raw = await res.json() as OpenAIVideoRaw; + return openAIResponse(raw, taskId, targetDuration); +} + +export async function downloadOpenAIVideoContent(taskId: string): Promise<{ buffer: Buffer; type: string }> { + const key = process.env.OPENAI_API_KEY; + if (!key) throw new Error('OPENAI_API_KEY missing'); + if (!taskId) throw new Error('taskId required'); + + const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}/content`, { + headers: { Authorization: `Bearer ${key}` }, + }); + + if (!res.ok) throw new Error(`OpenAI video content ${res.status}: ${await res.text()}`); + const type = res.headers.get('content-type')?.split(';')[0] || 'video/mp4'; + return { buffer: Buffer.from(await res.arrayBuffer()), type }; +} + +export async function generateVideo(opts: VideoGenerationRequest): Promise { + return activeVideoProvider() === 'seedance' + ? generateSeedanceVideo(opts) + : generateOpenAIVideo(opts); +} + +export async function getVideoTask(taskId: string, provider: VideoProvider, targetDuration?: number): Promise { + return provider === 'seedance' + ? getSeedanceVideoTask(taskId) + : getOpenAIVideoTask(taskId, targetDuration); +} + export async function getSeedanceVideoTask(taskId: string): Promise { const key = process.env.SEEDANCE_API_KEY; if (!key) throw new Error('SEEDANCE_API_KEY missing');