fix: align generation size and duration options

This commit is contained in:
2026-05-25 14:23:09 +08:00
parent fa64f95911
commit e77e77fada
5 changed files with 239 additions and 23 deletions

View File

@@ -124,6 +124,58 @@ IMAGE_FALLBACK_ENABLED = os.getenv("IMAGE_FALLBACK_ENABLED", "true").strip().low
IMAGE_MODEL = GPT_IMAGE_MODEL
PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL
SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL
IMAGE_SIZE_CHOICES = [
{
"id": "auto",
"label": "自动",
"value": "auto",
"description": "由图片模型自行决定输出尺寸",
},
{
"id": "1024x1536",
"label": "竖图 2:3",
"value": "1024x1536",
"description": "适合信息流营销图、人物和产品竖版构图",
},
{
"id": "1024x1024",
"label": "方图 1:1",
"value": "1024x1024",
"description": "适合头像、方形素材和电商图",
},
{
"id": "1536x1024",
"label": "横图 3:2",
"value": "1536x1024",
"description": "适合横版封面和详情页配图",
},
]
VIDEO_SIZE_CHOICES = [
{
"id": "720x1280",
"label": "竖屏 9:16",
"value": "720x1280",
"description": "适合抖音、短视频和飞书内预览",
},
{
"id": "1280x720",
"label": "横屏 16:9",
"value": "1280x720",
"description": "适合横版展示和网页视频",
},
{
"id": "1024x1024",
"label": "方形 1:1",
"value": "1024x1024",
"description": "适合方形广告位",
},
{
"id": "960x1280",
"label": "竖屏 3:4",
"value": "960x1280",
"description": "适合更接近图文卡片的竖版素材",
},
]
SubjectModelBundle = Literal["gpt", "gemini"]
SubjectAgentMode = Literal["realistic", "cartoon", "elements", "custom"]
SUBJECT_AGENT_GPT_MODEL = gpt_model_env("SUBJECT_AGENT_GPT_MODEL", VISION_MODEL)
@@ -4134,6 +4186,67 @@ def image_model_options() -> list[dict]:
return options
def image_size_options() -> list[dict]:
return IMAGE_SIZE_CHOICES
def _normalize_image_size(raw: str | None) -> str:
value = (raw or "auto").strip().lower()
aliases = {
"vertical": "1024x1536",
"portrait": "1024x1536",
"竖图": "1024x1536",
"square": "1024x1024",
"方图": "1024x1024",
"horizontal": "1536x1024",
"landscape": "1536x1024",
"横图": "1536x1024",
}
value = aliases.get(value, value)
allowed = {str(item["value"]) for item in IMAGE_SIZE_CHOICES}
if value not in allowed:
raise HTTPException(400, f"unsupported image size: {raw}")
return value
def _image_size_payload(raw: str | None) -> dict:
size = _normalize_image_size(raw)
return {} if size == "auto" else {"size": size}
def video_duration_options() -> list[int]:
if video_uses_ark():
return [5, 8, 10, 12, 15]
return [4, 8, 12]
def video_size_options() -> list[dict]:
return VIDEO_SIZE_CHOICES
def _normalize_video_size(raw: str | None) -> str:
value = (raw or "720x1280").strip().lower().replace(" ", "")
aliases = {
"vertical": "720x1280",
"portrait": "720x1280",
"9:16": "720x1280",
"竖屏": "720x1280",
"horizontal": "1280x720",
"landscape": "1280x720",
"16:9": "1280x720",
"横屏": "1280x720",
"square": "1024x1024",
"1:1": "1024x1024",
"方形": "1024x1024",
"3:4": "960x1280",
}
value = aliases.get(value, value)
allowed = {str(item["value"]) for item in VIDEO_SIZE_CHOICES}
if value not in allowed:
raise HTTPException(400, f"unsupported video size: {raw}")
return value
def video_model_options() -> list[dict]:
label_map = {
"seedance": "Seedance",
@@ -4156,7 +4269,10 @@ def video_model_options() -> list[dict]:
"id": key,
"label": label_map.get(key, key),
"model": model,
"description": "当前视频网关可选模型",
"description": f"当前视频网关可选模型;单次时长最高 {max(video_duration_options())}",
"duration_options": video_duration_options(),
"size_options": video_size_options(),
"max_duration_seconds": max(video_duration_options()),
"available": bool(video_api_key()),
})
default_model = resolve_video_model(VIDEO_MODEL)
@@ -4166,6 +4282,9 @@ def video_model_options() -> list[dict]:
"label": label_map.get(VIDEO_MODEL, VIDEO_MODEL),
"model": default_model,
"description": "默认视频模型",
"duration_options": video_duration_options(),
"size_options": video_size_options(),
"max_duration_seconds": max(video_duration_options()),
"available": bool(video_api_key()),
})
return options
@@ -4252,12 +4371,12 @@ def _image_endpoint(path: str) -> str:
return f"{base}/{path.lstrip('/')}"
def _image_generation_response(prompt: str, model: str) -> dict:
def _image_generation_response(prompt: str, model: str, size: str | None = "auto") -> dict:
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
r = client.post(
_image_endpoint("/images/generations"),
headers={"Authorization": f"Bearer {IMAGE_API_KEY}"},
json={"model": model, "prompt": prompt, "n": 1},
json={"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size)},
)
r.raise_for_status()
return r.json()
@@ -5198,6 +5317,7 @@ def health() -> dict:
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
"image_options": image_model_options(),
"image_size_options": image_size_options(),
"ai_proxy_configured": bool(AI_HTTP_PROXY),
"image_fallbacks": _image_fallback_models(),
"image_circuit": _image_circuit_snapshot(),
@@ -5213,6 +5333,9 @@ def health() -> dict:
"video": VIDEO_MODEL,
"video_aliases": VIDEO_MODEL_ALIASES,
"video_options": video_model_options(),
"video_duration_options": video_duration_options(),
"video_max_duration_seconds": max(video_duration_options()),
"video_size_options": video_size_options(),
"video_provider": video_provider_name(),
"video_base_url": video_api_base(),
"video_configured": bool(video_api_key()),
@@ -5666,6 +5789,7 @@ class GenerateReq(BaseModel):
extra_prompt: str = "" # ✓ 需要的元素(正向)
negative_prompt: str = "" # ✗ 不需要的元素(负向)
model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview
size: str = "auto" # auto / 1024x1536 / 1024x1024 / 1536x1024
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference迭代否则原关键帧
@@ -5702,6 +5826,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
if not raw_prompt:
raise HTTPException(400, "prompt required")
full_prompt = _ensure_english(raw_prompt)
image_size = _normalize_image_size(req.size)
if not IMAGE_API_KEY:
raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")
@@ -5742,14 +5867,14 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
headers={
"Authorization": f"Bearer {IMAGE_API_KEY}",
},
data={"model": current_model, "prompt": full_prompt, "n": "1"},
data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_size_payload(image_size)},
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
)
r.raise_for_status()
resp_data = r.json()
else:
# text-only
resp_data = _image_generation_response(full_prompt, current_model)
resp_data = _image_generation_response(full_prompt, current_model, image_size)
if resp_data.get("data"):
effective_mode = f"{current_mode}:{current_model}"
@@ -7870,6 +7995,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
model = resolve_video_model(req.model)
seconds = video_seconds(float(req.duration or 4))
video_size = _normalize_video_size(req.size)
source_ref = req.source_ref
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
source_ref = None
@@ -7894,7 +8020,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
progress=0,
created_at=time.time(),
))
task_args = (job.id, local_id, "", ref_path, variant_prompt, model, seconds, req.size, source_ref, last_ref_path, reference_ref_paths, primary_role)
task_args = (job.id, local_id, "", ref_path, variant_prompt, model, seconds, video_size, source_ref, last_ref_path, reference_ref_paths, primary_role)
if bg is not None:
bg.add_task(render_storyboard_video, *task_args)
else: