Files
ai-toy-patent-workflow/src/lib/videoProviders.ts

349 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { VideoGenerationRequest, VideoGenerationResponse, VideoProvider } from './types';
import { readImageUrl } from './storage';
export const SEEDANCE_MODEL = process.env.SEEDANCE_MODEL || 'doubao-seedance-2-0-260128';
const SEEDANCE_API_BASE = process.env.SEEDANCE_API_BASE || 'https://ark.cn-beijing.volces.com/api/v3';
export const OPENAI_VIDEO_MODEL = process.env.OPENAI_VIDEO_MODEL || 'sora-2-pro';
const OPENAI_API_BASE = process.env.OPENAI_API_BASE || process.env.GPT_API_BASE || 'https://api.openai.com/v1';
const MIN_OPENAI_TARGET_SECONDS = Math.max(Number(process.env.OPENAI_VIDEO_MIN_SECONDS || 60), 60);
type OpenAIVideoRaw = {
id?: string;
model?: string;
status?: string;
video_url?: string;
seconds?: string | number;
size?: string;
progress?: number;
error?: { code?: string; message?: string };
};
export function activeVideoProvider(): VideoProvider {
return process.env.VIDEO_PROVIDER === 'seedance' ? 'seedance' : 'openai_sora';
}
function durationOrDefault(duration?: number): number {
return Math.min(Math.max(duration ?? 15, 3), 15);
}
function openAITargetDuration(duration?: number): number {
return Math.max(duration ?? MIN_OPENAI_TARGET_SECONDS, MIN_OPENAI_TARGET_SECONDS);
}
function openAIInitialSeconds(targetDuration: number): '4' | '8' | '12' {
if (targetDuration >= 12) return '12';
if (targetDuration >= 8) return '8';
return '4';
}
export function openAIVideoSeconds(raw?: unknown): number {
const value = (raw as OpenAIVideoRaw | undefined)?.seconds;
if (typeof value === 'number') return Number.isFinite(value) ? value : 0;
if (typeof value === 'string') {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : 0;
}
return 0;
}
export function openAIExtensionSeconds(currentSeconds: number, targetSeconds: number): '4' | '8' | '12' | '16' | '20' {
const remaining = targetSeconds - currentSeconds;
if (remaining >= 20) return '20';
if (remaining >= 16) return '16';
if (remaining >= 12) return '12';
if (remaining >= 8) return '8';
return '4';
}
function normalizeStatus(status?: string): VideoGenerationResponse['status'] {
if (status === 'succeeded') return 'succeeded';
if (status === 'failed') return 'failed';
if (status === 'processing' || status === 'running') return 'processing';
return 'submitted';
}
function normalizeOpenAIStatus(status?: string): VideoGenerationResponse['status'] {
if (status === 'completed') return 'succeeded';
if (status === 'failed') return 'failed';
if (status === 'in_progress') return 'processing';
return 'submitted';
}
function publicUrlOrUndefined(url?: string): string | undefined {
if (!url) return undefined;
if (url.startsWith('/')) {
const base = process.env.PUBLIC_APP_URL || process.env.NEXT_PUBLIC_APP_URL;
if (!base || /^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])/i.test(base)) return undefined;
return new URL(url, base).toString();
}
if (!/^https?:\/\//i.test(url)) return undefined;
if (/^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])/i.test(url)) return undefined;
return url;
}
function buildContent(opts: VideoGenerationRequest): Array<Record<string, unknown>> {
const content: Array<Record<string, unknown>> = [{ type: 'text', text: opts.prompt.trim() }];
const refs = [...(opts.references ?? [])];
if (opts.imageUrl) refs.unshift({ type: 'image_url', url: opts.imageUrl, role: 'reference_image' });
for (const ref of refs) {
const url = publicUrlOrUndefined(ref.url);
if (!url) continue;
if (ref.type === 'image_url') {
content.push({ type: 'image_url', image_url: { url }, role: ref.role ?? 'reference_image' });
}
if (ref.type === 'video_url') {
content.push({ type: 'video_url', video_url: { url }, role: ref.role ?? 'reference_video' });
}
if (ref.type === 'audio_url') {
content.push({ type: 'audio_url', audio_url: { url }, role: ref.role ?? 'reference_audio' });
}
}
return content;
}
function openAISizeForRatio(ratio?: VideoGenerationRequest['ratio'], resolution?: VideoGenerationRequest['resolution']): '720x1280' | '1280x720' | '1024x1792' | '1792x1024' {
const vertical = ratio === '9:16' || ratio === '3:4';
const high = resolution === '1080p' || process.env.OPENAI_VIDEO_QUALITY === 'high';
if (vertical) return high ? '1024x1792' : '720x1280';
return high ? '1792x1024' : '1280x720';
}
function withProductVideoConstraints(prompt: string, targetDuration: number): string {
return [
prompt.trim(),
'',
'硬性一致性约束:主角必须严格服从当前锁定角色和参考图。当前项目主角是“有你家族 · 亥猪”40cm+ AI 陪伴机甲摆件,不是传统卡通猪。',
'必须保留白色圆润头盔、橙色弧形 visor 情绪屏、深灰面部底层、头顶 MEEY 竖条、胸前 M 徽章、斜挎黑色能量肩带、灰橙功能扣、侧面圆形模块、短胖低重心站立比例。',
'尺寸表现按 40cm+ 实体产品处理:正面宽约 28cm侧面深约 22cm可以用成人双手、包装盒、桌面或展台帮助体现体量不能缩成掌心小玩偶、钥匙扣、迷你挂件或低于 40cm 的小公仔。',
'外部材质可以是亲肤短绒、软壳或软硅胶复合质感,但不能改变基础机甲轮廓、面罩形状、肩带路径、徽章位置和核心配件关系。',
'禁止生成猪鼻子、猪尾巴、写实猪耳、猪蹄、四足动物身体、其它生肖动物本体、武器和攻击性重装机甲。',
'如果参考图有中文或数字,只把它们当作比例依据;成片中不要生成任何数字、厘米文字、箭头尺寸标注或文字海报,避免出现错误读数。',
`目标视频总时长不少于 ${targetDuration} 秒;如果 API 需要分段或延展,保持同一角色、同一尺寸比例和连续镜头语言。`,
].join('\n');
}
async function openAIInputReference(opts: VideoGenerationRequest): Promise<{ image_url: string } | undefined> {
const refs = [...(opts.references ?? [])];
if (opts.imageUrl) refs.unshift({ type: 'image_url', url: opts.imageUrl, role: 'reference_image' });
const image = refs.find(ref => ref.type === 'image_url');
const url = publicUrlOrUndefined(image?.url);
if (url) return { image_url: url };
if (!image?.url) return undefined;
const source = await readImageUrl(image.url);
if (source.type.includes('svg')) return undefined;
const dataUrl = `data:${source.type};base64,${source.buf.toString('base64')}`;
return dataUrl.length <= 20 * 1024 * 1024 ? { image_url: dataUrl } : undefined;
}
function openAIResponse(raw: OpenAIVideoRaw, fallbackTaskId: string | undefined, targetDuration: number): VideoGenerationResponse {
const taskId = raw.id || fallbackTaskId;
return {
provider: 'openai_sora',
model: raw.model || OPENAI_VIDEO_MODEL,
taskId,
status: normalizeOpenAIStatus(raw.status),
videoUrl: raw.video_url,
raw: {
...raw,
target_seconds: targetDuration,
current_seconds: openAIVideoSeconds(raw),
},
};
}
export async function generateSeedanceVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
const key = process.env.SEEDANCE_API_KEY;
if (!key) throw new Error('SEEDANCE_API_KEY missing');
if (!opts.prompt?.trim()) throw new Error('prompt required');
const content = buildContent({
...opts,
prompt: withProductVideoConstraints(opts.prompt, Math.max(opts.duration ?? 60, 60)),
});
const body: Record<string, unknown> = {
model: SEEDANCE_MODEL,
content,
generate_audio: opts.generateAudio ?? true,
ratio: opts.ratio || '16:9',
duration: durationOrDefault(opts.duration),
watermark: opts.watermark ?? false,
};
if (opts.resolution) body.resolution = opts.resolution;
const res = await fetch(`${SEEDANCE_API_BASE}/contents/generations/tasks`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify(body),
});
if (!res.ok) throw new Error(`Seedance ${res.status}: ${await res.text()}`);
const raw = await res.json() as {
id?: string;
task_id?: string;
status?: string;
video_url?: string;
output?: { video_url?: string; url?: string };
content?: { video_url?: string; url?: string };
};
return {
provider: 'seedance',
model: SEEDANCE_MODEL,
taskId: raw.task_id || raw.id,
status: normalizeStatus(raw.status),
videoUrl: raw.video_url || raw.output?.video_url || raw.output?.url || raw.content?.video_url || raw.content?.url,
raw,
};
}
export async function generateOpenAIVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!opts.prompt?.trim()) throw new Error('prompt required');
const targetDuration = openAITargetDuration(opts.duration);
const body: Record<string, unknown> = {
model: OPENAI_VIDEO_MODEL,
prompt: withProductVideoConstraints(opts.prompt, targetDuration),
seconds: openAIInitialSeconds(targetDuration),
size: openAISizeForRatio(opts.ratio, opts.resolution),
};
const inputReference = await openAIInputReference(opts);
if (inputReference) body.input_reference = inputReference;
let res = await fetch(`${OPENAI_API_BASE}/videos`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify(body),
});
let rawText = !res.ok ? await res.text() : '';
if (!res.ok && inputReference && /must match the requested width and height/i.test(rawText)) {
delete body.input_reference;
res = await fetch(`${OPENAI_API_BASE}/videos`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify(body),
});
rawText = !res.ok ? await res.text() : '';
}
if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${rawText}`);
const raw = await res.json() as OpenAIVideoRaw;
if (inputReference && !body.input_reference) {
const response = openAIResponse(raw, undefined, targetDuration);
return { ...response, raw: { ...(response.raw as object), input_reference_dropped: 'size_mismatch' } };
}
return openAIResponse(raw, undefined, targetDuration);
}
export async function extendOpenAIVideo(opts: {
taskId: string;
prompt: string;
currentSeconds: number;
targetSeconds: number;
}): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!opts.taskId) throw new Error('taskId required');
if (!opts.prompt?.trim()) throw new Error('prompt required');
const seconds = openAIExtensionSeconds(opts.currentSeconds, opts.targetSeconds);
const res = await fetch(`${OPENAI_API_BASE}/videos/extensions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${key}`,
},
body: JSON.stringify({
prompt: withProductVideoConstraints(opts.prompt, opts.targetSeconds),
seconds,
video: { id: opts.taskId },
}),
});
if (!res.ok) throw new Error(`OpenAI video extend ${res.status}: ${await res.text()}`);
const raw = await res.json() as OpenAIVideoRaw;
return openAIResponse(raw, opts.taskId, opts.targetSeconds);
}
export async function getOpenAIVideoTask(taskId: string, targetDuration = MIN_OPENAI_TARGET_SECONDS): Promise<VideoGenerationResponse> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!taskId) throw new Error('taskId required');
const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}`, {
headers: { Authorization: `Bearer ${key}` },
});
if (!res.ok) throw new Error(`OpenAI video ${res.status}: ${await res.text()}`);
const raw = await res.json() as OpenAIVideoRaw;
return openAIResponse(raw, taskId, targetDuration);
}
export async function downloadOpenAIVideoContent(taskId: string): Promise<{ buffer: Buffer; type: string }> {
const key = process.env.OPENAI_API_KEY;
if (!key) throw new Error('OPENAI_API_KEY missing');
if (!taskId) throw new Error('taskId required');
const res = await fetch(`${OPENAI_API_BASE}/videos/${encodeURIComponent(taskId)}/content`, {
headers: { Authorization: `Bearer ${key}` },
});
if (!res.ok) throw new Error(`OpenAI video content ${res.status}: ${await res.text()}`);
const type = res.headers.get('content-type')?.split(';')[0] || 'video/mp4';
return { buffer: Buffer.from(await res.arrayBuffer()), type };
}
export async function generateVideo(opts: VideoGenerationRequest): Promise<VideoGenerationResponse> {
return activeVideoProvider() === 'seedance'
? generateSeedanceVideo(opts)
: generateOpenAIVideo(opts);
}
export async function getVideoTask(taskId: string, provider: VideoProvider, targetDuration?: number): Promise<VideoGenerationResponse> {
return provider === 'seedance'
? getSeedanceVideoTask(taskId)
: getOpenAIVideoTask(taskId, targetDuration);
}
export async function getSeedanceVideoTask(taskId: string): Promise<VideoGenerationResponse> {
const key = process.env.SEEDANCE_API_KEY;
if (!key) throw new Error('SEEDANCE_API_KEY missing');
if (!taskId) throw new Error('taskId required');
const res = await fetch(`${SEEDANCE_API_BASE}/contents/generations/tasks/${encodeURIComponent(taskId)}`, {
headers: { Authorization: `Bearer ${key}` },
});
if (!res.ok) throw new Error(`Seedance ${res.status}: ${await res.text()}`);
const raw = await res.json() as {
id?: string;
task_id?: string;
status?: string;
video_url?: string;
output?: { video_url?: string; url?: string };
content?: { video_url?: string; url?: string };
};
return {
provider: 'seedance',
model: SEEDANCE_MODEL,
taskId: raw.task_id || raw.id || taskId,
status: normalizeStatus(raw.status),
videoUrl: raw.video_url || raw.output?.video_url || raw.output?.url || raw.content?.video_url || raw.content?.url,
raw,
};
}