feat: use OpenAI Sora for long videos

This commit is contained in:
2026-05-30 17:04:45 +08:00
parent a10cf6e7fb
commit 9c41cafb1a
11 changed files with 327 additions and 47 deletions

View File

@@ -1,12 +1,60 @@
import type { VideoGenerationRequest, VideoGenerationResponse } from './types';
import type { VideoGenerationRequest, VideoGenerationResponse, VideoProvider } from './types';
import { readImageUrl } from './storage';
export const SEEDANCE_MODEL = process.env.SEEDANCE_MODEL || 'doubao-seedance-2-0-260128';
const SEEDANCE_API_BASE = process.env.SEEDANCE_API_BASE || 'https://ark.cn-beijing.volces.com/api/v3';
export const OPENAI_VIDEO_MODEL = process.env.OPENAI_VIDEO_MODEL || 'sora-2-pro';
const OPENAI_API_BASE = process.env.OPENAI_API_BASE || process.env.GPT_API_BASE || 'https://api.openai.com/v1';
const MIN_OPENAI_TARGET_SECONDS = Math.max(Number(process.env.OPENAI_VIDEO_MIN_SECONDS || 60), 60);
type OpenAIVideoRaw = {
id?: string;
model?: string;
status?: string;
video_url?: string;
seconds?: string | number;
size?: string;
progress?: number;
error?: { code?: string; message?: string };
};
export function activeVideoProvider(): VideoProvider {
return process.env.VIDEO_PROVIDER === 'seedance' ? 'seedance' : 'openai_sora';
}
function durationOrDefault(duration?: number): number {
return Math.min(Math.max(duration ?? 6, 3), 15);
}
function openAITargetDuration(duration?: number): number {
return Math.max(duration ?? MIN_OPENAI_TARGET_SECONDS, MIN_OPENAI_TARGET_SECONDS);
}
function openAIInitialSeconds(targetDuration: number): '4' | '8' | '12' {
if (targetDuration >= 12) return '12';
if (targetDuration >= 8) return '8';
return '4';
}
export function openAIVideoSeconds(raw?: unknown): number {
const value = (raw as OpenAIVideoRaw | undefined)?.seconds;
if (typeof value === 'number') return Number.isFinite(value) ? value : 0;
if (typeof value === 'string') {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : 0;
}
return 0;
}
export function openAIExtensionSeconds(currentSeconds: number, targetSeconds: number): '4' | '8' | '12' | '16' | '20' {
const remaining = targetSeconds - currentSeconds;
if (remaining >= 20) return '20';
if (remaining >= 16) return '16';
if (remaining >= 12) return '12';
if (remaining >= 8) return '8';
return '4';
}
function normalizeStatus(status?: string): VideoGenerationResponse['status'] {
if (status === 'succeeded') return 'succeeded';
if (status === 'failed') return 'failed';
@@ -14,6 +62,13 @@ function normalizeStatus(status?: string): VideoGenerationResponse['status'] {
return 'submitted';
}
function normalizeOpenAIStatus(status?: string): VideoGenerationResponse['status'] {
if (status === 'completed') return 'succeeded';
if (status === 'failed') return 'failed';
if (status === 'in_progress') return 'processing';
return 'submitted';
}
function publicUrlOrUndefined(url?: string): string | undefined {
if (!url) return undefined;
if (url.startsWith('/')) {
@@ -47,6 +102,53 @@ function buildContent(opts: VideoGenerationRequest): Array<Record<string, unknow
return content;
}
function openAISizeForRatio(ratio?: VideoGenerationRequest['ratio'], resolution?: VideoGenerationRequest['resolution']): '720x1280' | '1280x720' | '1024x1792' | '1792x1024' {
const vertical = ratio === '9:16' || ratio === '3:4';
const high = resolution === '1080p' || process.env.OPENAI_VIDEO_QUALITY === 'high';
if (vertical) return high ? '1024x1792' : '720x1280';
return high ? '1792x1024' : '1280x720';
}
function withProductVideoConstraints(prompt: string, targetDuration: number): string {
return [
prompt.trim(),
'',
'硬性约束:主角必须是“有你家族 · 糯糯猪”智能陪伴毛绒娃娃,整体成品高度约 45cm必须明显是 40cm 以上的大尺寸抱抱玩偶。',
'保持浅粉毛绒、圆胖坐姿、黑亮眼睛、粉色猪鼻、下垂耳朵、金色挂绳和爱心吊牌;不要改成普通小挂件、钥匙扣或低于 40cm 的小公仔。',
`目标视频总时长不少于 ${targetDuration} 秒;如果 API 需要分段或延展,保持同一角色、同一尺寸比例和连续镜头语言。`,
].join('\n');
}
async function openAIInputReference(opts: VideoGenerationRequest): Promise<{ image_url: string } | undefined> {
const refs = [...(opts.references ?? [])];
if (opts.imageUrl) refs.unshift({ type: 'image_url', url: opts.imageUrl, role: 'reference_image' });
const image = refs.find(ref => ref.type === 'image_url');
const url = publicUrlOrUndefined(image?.url);
if (url) return { image_url: url };
if (!image?.url) return undefined;
const source = await readImageUrl(image.url);
if (source.type.includes('svg')) return undefined;
const dataUrl = `data:${source.type};base64,${source.buf.toString('base64')}`;
return dataUrl.length <= 20 * 1024 * 1024 ? { image_url: dataUrl } : undefined;
}
function openAIResponse(raw: OpenAIVideoRaw, fallbackTaskId: string | undefined, targetDuration: number): VideoGenerationResponse {
const taskId = raw.id || fallbackTaskId;
return {
provider: 'openai_sora',
model: raw.model || OPENAI_VIDEO_MODEL,
taskId,
status: normalizeOpenAIStatus(raw.status),
videoUrl: raw.video_url,
raw: {
...raw,
target_seconds: targetDuration,
current_seconds: openAIVideoSeconds(raw),
},
};
}
export async function generateSeedanceVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
const key = process.env.SEEDANCE_API_KEY;
if (!key) throw new Error('SEEDANCE_API_KEY missing');
@@ -92,6 +194,123 @@ export async function generateSeedanceVideo(opts: VideoGenerationRequest): Promi
};
}
export async function generateOpenAIVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!opts.prompt?.trim()) throw new Error('prompt required');
const targetDuration = openAITargetDuration(opts.duration);
const body: Record<string, unknown> = {
model: OPENAI_VIDEO_MODEL,
prompt: withProductVideoConstraints(opts.prompt, targetDuration),
seconds: openAIInitialSeconds(targetDuration),
size: openAISizeForRatio(opts.ratio, opts.resolution),
};
const inputReference = await openAIInputReference(opts);
if (inputReference) body.input_reference = inputReference;
let res = await fetch(`${OPENAI_API_BASE}/videos`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify(body),
});
let rawText = !res.ok ? await res.text() : '';
if (!res.ok && inputReference && /must match the requested width and height/i.test(rawText)) {
delete body.input_reference;
res = await fetch(`${OPENAI_API_BASE}/videos`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify(body),
});
rawText = !res.ok ? await res.text() : '';
}
if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${rawText}`);
const raw = await res.json() as OpenAIVideoRaw;
if (inputReference && !body.input_reference) {
const response = openAIResponse(raw, undefined, targetDuration);
return { ...response, raw: { ...(response.raw as object), input_reference_dropped: 'size_mismatch' } };
}
return openAIResponse(raw, undefined, targetDuration);
}
export async function extendOpenAIVideo(opts: {
taskId: string;
prompt: string;
currentSeconds: number;
targetSeconds: number;
}): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!opts.taskId) throw new Error('taskId required');
if (!opts.prompt?.trim()) throw new Error('prompt required');
const seconds = openAIExtensionSeconds(opts.currentSeconds, opts.targetSeconds);
const res = await fetch(`${OPENAI_API_BASE}/videos/extensions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify({
prompt: withProductVideoConstraints(opts.prompt, opts.targetSeconds),
seconds,
video: { id: opts.taskId },
}),
});
if (!res.ok) throw new Error(`OpenAI video extend ${res.status}: ${await res.text()}`);
const raw = await res.json() as OpenAIVideoRaw;
return openAIResponse(raw, opts.taskId, opts.targetSeconds);
}
export async function getOpenAIVideoTask(taskId: string, targetDuration = MIN_OPENAI_TARGET_SECONDS): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!taskId) throw new Error('taskId required');
const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}`, {
headers: { Authorization: `Bearer ${key}` },
});
if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${await res.text()}`);
const raw = await res.json() as OpenAIVideoRaw;
return openAIResponse(raw, taskId, targetDuration);
}
export async function downloadOpenAIVideoContent(taskId: string): Promise<{ buffer: Buffer; type: string }> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!taskId) throw new Error('taskId required');
const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}/content`, {
headers: { Authorization: `Bearer ${key}` },
});
if (!res.ok) throw new Error(`OpenAI video content ${res.status}: ${await res.text()}`);
const type = res.headers.get('content-type')?.split(';')[0] || 'video/mp4';
return { buffer: Buffer.from(await res.arrayBuffer()), type };
}
export async function generateVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
return activeVideoProvider() === 'seedance'
? generateSeedanceVideo(opts)
: generateOpenAIVideo(opts);
}
export async function getVideoTask(taskId: string, provider: VideoProvider, targetDuration?: number): Promise<VideoGenerationResponse> {
return provider === 'seedance'
? getSeedanceVideoTask(taskId)
: getOpenAIVideoTask(taskId, targetDuration);
}
export async function getSeedanceVideoTask(taskId: string): Promise<VideoGenerationResponse> {
const key = process.env.SEEDANCE_API_KEY;
if (!key) throw new Error('SEEDANCE_API_KEY missing');