fix: align generation size and duration options
This commit is contained in:
138
api/main.py
138
api/main.py
@@ -124,6 +124,58 @@ IMAGE_FALLBACK_ENABLED = os.getenv("IMAGE_FALLBACK_ENABLED", "true").strip().low
|
||||
IMAGE_MODEL = GPT_IMAGE_MODEL
|
||||
PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL
|
||||
SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL
|
||||
IMAGE_SIZE_CHOICES = [
|
||||
{
|
||||
"id": "auto",
|
||||
"label": "自动",
|
||||
"value": "auto",
|
||||
"description": "由图片模型自行决定输出尺寸",
|
||||
},
|
||||
{
|
||||
"id": "1024x1536",
|
||||
"label": "竖图 2:3",
|
||||
"value": "1024x1536",
|
||||
"description": "适合信息流营销图、人物和产品竖版构图",
|
||||
},
|
||||
{
|
||||
"id": "1024x1024",
|
||||
"label": "方图 1:1",
|
||||
"value": "1024x1024",
|
||||
"description": "适合头像、方形素材和电商图",
|
||||
},
|
||||
{
|
||||
"id": "1536x1024",
|
||||
"label": "横图 3:2",
|
||||
"value": "1536x1024",
|
||||
"description": "适合横版封面和详情页配图",
|
||||
},
|
||||
]
|
||||
VIDEO_SIZE_CHOICES = [
|
||||
{
|
||||
"id": "720x1280",
|
||||
"label": "竖屏 9:16",
|
||||
"value": "720x1280",
|
||||
"description": "适合抖音、短视频和飞书内预览",
|
||||
},
|
||||
{
|
||||
"id": "1280x720",
|
||||
"label": "横屏 16:9",
|
||||
"value": "1280x720",
|
||||
"description": "适合横版展示和网页视频",
|
||||
},
|
||||
{
|
||||
"id": "1024x1024",
|
||||
"label": "方形 1:1",
|
||||
"value": "1024x1024",
|
||||
"description": "适合方形广告位",
|
||||
},
|
||||
{
|
||||
"id": "960x1280",
|
||||
"label": "竖屏 3:4",
|
||||
"value": "960x1280",
|
||||
"description": "适合更接近图文卡片的竖版素材",
|
||||
},
|
||||
]
|
||||
SubjectModelBundle = Literal["gpt", "gemini"]
|
||||
SubjectAgentMode = Literal["realistic", "cartoon", "elements", "custom"]
|
||||
SUBJECT_AGENT_GPT_MODEL = gpt_model_env("SUBJECT_AGENT_GPT_MODEL", VISION_MODEL)
|
||||
@@ -4134,6 +4186,67 @@ def image_model_options() -> list[dict]:
|
||||
return options
|
||||
|
||||
|
||||
def image_size_options() -> list[dict]:
|
||||
return IMAGE_SIZE_CHOICES
|
||||
|
||||
|
||||
def _normalize_image_size(raw: str | None) -> str:
|
||||
value = (raw or "auto").strip().lower()
|
||||
aliases = {
|
||||
"vertical": "1024x1536",
|
||||
"portrait": "1024x1536",
|
||||
"竖图": "1024x1536",
|
||||
"square": "1024x1024",
|
||||
"方图": "1024x1024",
|
||||
"horizontal": "1536x1024",
|
||||
"landscape": "1536x1024",
|
||||
"横图": "1536x1024",
|
||||
}
|
||||
value = aliases.get(value, value)
|
||||
allowed = {str(item["value"]) for item in IMAGE_SIZE_CHOICES}
|
||||
if value not in allowed:
|
||||
raise HTTPException(400, f"unsupported image size: {raw}")
|
||||
return value
|
||||
|
||||
|
||||
def _image_size_payload(raw: str | None) -> dict:
|
||||
size = _normalize_image_size(raw)
|
||||
return {} if size == "auto" else {"size": size}
|
||||
|
||||
|
||||
def video_duration_options() -> list[int]:
|
||||
if video_uses_ark():
|
||||
return [5, 8, 10, 12, 15]
|
||||
return [4, 8, 12]
|
||||
|
||||
|
||||
def video_size_options() -> list[dict]:
|
||||
return VIDEO_SIZE_CHOICES
|
||||
|
||||
|
||||
def _normalize_video_size(raw: str | None) -> str:
|
||||
value = (raw or "720x1280").strip().lower().replace(" ", "")
|
||||
aliases = {
|
||||
"vertical": "720x1280",
|
||||
"portrait": "720x1280",
|
||||
"9:16": "720x1280",
|
||||
"竖屏": "720x1280",
|
||||
"horizontal": "1280x720",
|
||||
"landscape": "1280x720",
|
||||
"16:9": "1280x720",
|
||||
"横屏": "1280x720",
|
||||
"square": "1024x1024",
|
||||
"1:1": "1024x1024",
|
||||
"方形": "1024x1024",
|
||||
"3:4": "960x1280",
|
||||
}
|
||||
value = aliases.get(value, value)
|
||||
allowed = {str(item["value"]) for item in VIDEO_SIZE_CHOICES}
|
||||
if value not in allowed:
|
||||
raise HTTPException(400, f"unsupported video size: {raw}")
|
||||
return value
|
||||
|
||||
|
||||
def video_model_options() -> list[dict]:
|
||||
label_map = {
|
||||
"seedance": "Seedance",
|
||||
@@ -4156,7 +4269,10 @@ def video_model_options() -> list[dict]:
|
||||
"id": key,
|
||||
"label": label_map.get(key, key),
|
||||
"model": model,
|
||||
"description": "当前视频网关可选模型",
|
||||
"description": f"当前视频网关可选模型;单次时长最高 {max(video_duration_options())} 秒",
|
||||
"duration_options": video_duration_options(),
|
||||
"size_options": video_size_options(),
|
||||
"max_duration_seconds": max(video_duration_options()),
|
||||
"available": bool(video_api_key()),
|
||||
})
|
||||
default_model = resolve_video_model(VIDEO_MODEL)
|
||||
@@ -4166,6 +4282,9 @@ def video_model_options() -> list[dict]:
|
||||
"label": label_map.get(VIDEO_MODEL, VIDEO_MODEL),
|
||||
"model": default_model,
|
||||
"description": "默认视频模型",
|
||||
"duration_options": video_duration_options(),
|
||||
"size_options": video_size_options(),
|
||||
"max_duration_seconds": max(video_duration_options()),
|
||||
"available": bool(video_api_key()),
|
||||
})
|
||||
return options
|
||||
@@ -4252,12 +4371,12 @@ def _image_endpoint(path: str) -> str:
|
||||
return f"{base}/{path.lstrip('/')}"
|
||||
|
||||
|
||||
def _image_generation_response(prompt: str, model: str) -> dict:
|
||||
def _image_generation_response(prompt: str, model: str, size: str | None = "auto") -> dict:
|
||||
with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
|
||||
r = client.post(
|
||||
_image_endpoint("/images/generations"),
|
||||
headers={"Authorization": f"Bearer {IMAGE_API_KEY}"},
|
||||
json={"model": model, "prompt": prompt, "n": 1},
|
||||
json={"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size)},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
@@ -5198,6 +5317,7 @@ def health() -> dict:
|
||||
"image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
|
||||
"image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
|
||||
"image_options": image_model_options(),
|
||||
"image_size_options": image_size_options(),
|
||||
"ai_proxy_configured": bool(AI_HTTP_PROXY),
|
||||
"image_fallbacks": _image_fallback_models(),
|
||||
"image_circuit": _image_circuit_snapshot(),
|
||||
@@ -5213,6 +5333,9 @@ def health() -> dict:
|
||||
"video": VIDEO_MODEL,
|
||||
"video_aliases": VIDEO_MODEL_ALIASES,
|
||||
"video_options": video_model_options(),
|
||||
"video_duration_options": video_duration_options(),
|
||||
"video_max_duration_seconds": max(video_duration_options()),
|
||||
"video_size_options": video_size_options(),
|
||||
"video_provider": video_provider_name(),
|
||||
"video_base_url": video_api_base(),
|
||||
"video_configured": bool(video_api_key()),
|
||||
@@ -5666,6 +5789,7 @@ class GenerateReq(BaseModel):
|
||||
extra_prompt: str = "" # ✓ 需要的元素(正向)
|
||||
negative_prompt: str = "" # ✗ 不需要的元素(负向)
|
||||
model: str = "auto" # auto / gpt-image-2 / gemini-3-pro-image-preview
|
||||
size: str = "auto" # auto / 1024x1536 / 1024x1024 / 1536x1024
|
||||
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
|
||||
from_selected: bool = False # True 时优先用 frame.selected 的生成图作 reference(迭代),否则原关键帧
|
||||
|
||||
@@ -5702,6 +5826,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
||||
if not raw_prompt:
|
||||
raise HTTPException(400, "prompt required")
|
||||
full_prompt = _ensure_english(raw_prompt)
|
||||
image_size = _normalize_image_size(req.size)
|
||||
if not IMAGE_API_KEY:
|
||||
raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")
|
||||
|
||||
@@ -5742,14 +5867,14 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
||||
headers={
|
||||
"Authorization": f"Bearer {IMAGE_API_KEY}",
|
||||
},
|
||||
data={"model": current_model, "prompt": full_prompt, "n": "1"},
|
||||
data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_size_payload(image_size)},
|
||||
files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
|
||||
)
|
||||
r.raise_for_status()
|
||||
resp_data = r.json()
|
||||
else:
|
||||
# text-only
|
||||
resp_data = _image_generation_response(full_prompt, current_model)
|
||||
resp_data = _image_generation_response(full_prompt, current_model, image_size)
|
||||
|
||||
if resp_data.get("data"):
|
||||
effective_mode = f"{current_mode}:{current_model}"
|
||||
@@ -7870,6 +7995,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
|
||||
|
||||
model = resolve_video_model(req.model)
|
||||
seconds = video_seconds(float(req.duration or 4))
|
||||
video_size = _normalize_video_size(req.size)
|
||||
source_ref = req.source_ref
|
||||
if source_ref and source_ref.kind == "source_video" and not source_ref.url:
|
||||
source_ref = None
|
||||
@@ -7894,7 +8020,7 @@ def _enqueue_storyboard_videos(job: Job, frame: KeyFrame, req: GenerateStoryboar
|
||||
progress=0,
|
||||
created_at=time.time(),
|
||||
))
|
||||
task_args = (job.id, local_id, "", ref_path, variant_prompt, model, seconds, req.size, source_ref, last_ref_path, reference_ref_paths, primary_role)
|
||||
task_args = (job.id, local_id, "", ref_path, variant_prompt, model, seconds, video_size, source_ref, last_ref_path, reference_ref_paths, primary_role)
|
||||
if bg is not None:
|
||||
bg.add_task(render_storyboard_video, *task_args)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user