feat: expose image quality and pixel sizes

2026-05-26 12:22:21 +08:00
parent 47300b8fa6
commit 5d047af346
14 changed files with 461 additions and 77 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -132,26 +132,139 @@ IMAGE_SIZE_CHOICES = [
        "id": "auto",
        "label": "自动",
        "value": "auto",
-        "description": "由图片模型自行决定输出尺寸",
+        "ratio": "auto",
+        "width": 0,
+        "height": 0,
+        "description": "由图片模型自行决定输出尺寸，生成后显示实际像素",
    },
    {
        "id": "1024x1536",
-        "label": "竖图 2:3",
+        "label": "竖图 2:3 · 1024×1536",
        "value": "1024x1536",
+        "ratio": "2:3",
+        "width": 1024,
+        "height": 1536,
        "description": "适合信息流营销图、人物和产品竖版构图",
    },
+    {
+        "id": "1536x2304",
+        "label": "竖图 2:3 · 1536×2304",
+        "value": "1536x2304",
+        "ratio": "2:3",
+        "width": 1536,
+        "height": 2304,
+        "description": "适合高精细竖版海报和后期裁切",
+    },
+    {
+        "id": "1088x1920",
+        "label": "竖屏 9:16 · 1088×1920",
+        "value": "1088x1920",
+        "ratio": "9:16",
+        "width": 1088,
+        "height": 1920,
+        "description": "接近 1080p 竖屏，宽度按 16 像素倍数提交",
+    },
+    {
+        "id": "1440x2560",
+        "label": "竖屏 9:16 · 1440×2560",
+        "value": "1440x2560",
+        "ratio": "9:16",
+        "width": 1440,
+        "height": 2560,
+        "description": "适合短视频封面、竖屏高清素材和二次裁切",
+    },
+    {
+        "id": "960x1280",
+        "label": "竖图 3:4 · 960×1280",
+        "value": "960x1280",
+        "ratio": "3:4",
+        "width": 960,
+        "height": 1280,
+        "description": "适合偏人物或产品竖图，文件体积较轻",
+    },
+    {
+        "id": "1536x1920",
+        "label": "竖图 4:5 · 1536×1920",
+        "value": "1536x1920",
+        "ratio": "4:5",
+        "width": 1536,
+        "height": 1920,
+        "description": "适合小红书、社媒封面和产品展示图",
+    },
    {
        "id": "1024x1024",
-        "label": "方图 1:1",
+        "label": "方图 1:1 · 1024×1024",
        "value": "1024x1024",
+        "ratio": "1:1",
+        "width": 1024,
+        "height": 1024,
        "description": "适合头像、方形素材和电商图",
    },
+    {
+        "id": "2048x2048",
+        "label": "方图 1:1 · 2048×2048",
+        "value": "2048x2048",
+        "ratio": "1:1",
+        "width": 2048,
+        "height": 2048,
+        "description": "适合高清方形素材和后期抠图",
+    },
    {
        "id": "1536x1024",
-        "label": "横图 3:2",
+        "label": "横图 3:2 · 1536×1024",
        "value": "1536x1024",
+        "ratio": "3:2",
+        "width": 1536,
+        "height": 1024,
        "description": "适合横版封面和详情页配图",
    },
+    {
+        "id": "2304x1536",
+        "label": "横图 3:2 · 2304×1536",
+        "value": "2304x1536",
+        "ratio": "3:2",
+        "width": 2304,
+        "height": 1536,
+        "description": "适合高清横版主视觉和详情页大图",
+    },
+    {
+        "id": "1280x720",
+        "label": "横屏 16:9 · 1280×720",
+        "value": "1280x720",
+        "ratio": "16:9",
+        "width": 1280,
+        "height": 720,
+        "description": "适合轻量横版封面、网页首屏和视频首帧",
+    },
+    {
+        "id": "2048x1152",
+        "label": "横屏 16:9 · 2048×1152",
+        "value": "2048x1152",
+        "ratio": "16:9",
+        "width": 2048,
+        "height": 1152,
+        "description": "适合高清横版视频封面和大屏展示",
+    },
+]
+IMAGE_QUALITY_CHOICES = [
+    {
+        "id": "low",
+        "label": "低 · 快速草稿",
+        "value": "low",
+        "description": "更快生成，适合批量试方向",
+    },
+    {
+        "id": "medium",
+        "label": "中 · 常规出图",
+        "value": "medium",
+        "description": "速度和质量折中，适合日常迭代",
+    },
+    {
+        "id": "high",
+        "label": "高 · 最终稿",
+        "value": "high",
+        "description": "质量优先，适合定稿和高清素材",
+    },
 ]
 VIDEO_SIZE_CHOICES = [
    {
@@ -454,6 +567,10 @@ class GeneratedImage(BaseModel):
    model: str
    mode: str = "edit"  # "edit"(带参考图) | "text"(纯文字)
    url: str  # /jobs/{job_id}/frames/{idx}/gen/{id}.jpg
+    size: str = ""
+    quality: str = ""
+    width: int = 0
+    height: int = 0
    selected: bool = False
    created_at: float = 0.0

@@ -4591,23 +4708,69 @@ def image_size_options() -> list[dict]:
    return IMAGE_SIZE_CHOICES


+def image_quality_options() -> list[dict]:
+    return IMAGE_QUALITY_CHOICES
+
+
+def _parse_image_dimensions(value: str) -> tuple[int, int] | None:
+    normalized = value.strip().lower().replace("×", "x")
+    m = re.fullmatch(r"(\d{3,4})\s*x\s*(\d{3,4})", normalized)
+    if not m:
+        return None
+    return int(m.group(1)), int(m.group(2))
+
+
+def _validate_custom_image_size(width: int, height: int, raw: str) -> str:
+    pixels = width * height
+    long_edge = max(width, height)
+    short_edge = min(width, height)
+    if width % 16 != 0 or height % 16 != 0:
+        raise HTTPException(400, f"unsupported image size: {raw} (宽高必须都是 16 的倍数，例如 1088x1920)")
+    if long_edge > 3840:
+        raise HTTPException(400, f"unsupported image size: {raw} (最长边不能超过 3840px)")
+    if long_edge / short_edge > 3:
+        raise HTTPException(400, f"unsupported image size: {raw} (画幅比例不能超过 3:1)")
+    if pixels < 655_360 or pixels > 8_294_400:
+        raise HTTPException(400, f"unsupported image size: {raw} (总像素需在 655360 到 8294400 之间)")
+    return f"{width}x{height}"
+
+
 def _normalize_image_size(raw: str | None) -> str:
    value = (raw or "auto").strip().lower()
    aliases = {
+        "9:16": "1088x1920",
+        "9x16": "1088x1920",
+        "16:9": "1280x720",
+        "16x9": "1280x720",
+        "1:1": "1024x1024",
+        "1x1": "1024x1024",
+        "2:3": "1024x1536",
+        "2x3": "1024x1536",
+        "3:2": "1536x1024",
+        "3x2": "1536x1024",
+        "3:4": "960x1280",
+        "3x4": "960x1280",
+        "4:5": "1536x1920",
+        "4x5": "1536x1920",
        "vertical": "1024x1536",
        "portrait": "1024x1536",
        "竖图": "1024x1536",
+        "竖屏": "1088x1920",
        "square": "1024x1024",
        "方图": "1024x1024",
        "horizontal": "1536x1024",
        "landscape": "1536x1024",
        "横图": "1536x1024",
+        "横屏": "1280x720",
    }
    value = aliases.get(value, value)
    allowed = {str(item["value"]) for item in IMAGE_SIZE_CHOICES}
-    if value not in allowed:
-        raise HTTPException(400, f"unsupported image size: {raw}")
-    return value
+    if value in allowed:
+        return value
+    dimensions = _parse_image_dimensions(value)
+    if dimensions:
+        return _validate_custom_image_size(dimensions[0], dimensions[1], raw or value)
+    raise HTTPException(400, f"unsupported image size: {raw}")


 def _image_size_payload(raw: str | None) -> dict:
@@ -4615,6 +4778,39 @@ def _image_size_payload(raw: str | None) -> dict:
    return {} if size == "auto" else {"size": size}


+def _normalize_image_quality(raw: str | None) -> str:
+    value = (raw or "high").strip().lower()
+    aliases = {
+        "standard": "high",
+        "hd": "high",
+        "best": "high",
+        "高": "high",
+        "high-quality": "high",
+        "normal": "medium",
+        "regular": "medium",
+        "中": "medium",
+        "medium-quality": "medium",
+        "draft": "low",
+        "fast": "low",
+        "低": "low",
+        "low-quality": "low",
+    }
+    value = aliases.get(value, value)
+    allowed = {str(item["value"]) for item in IMAGE_QUALITY_CHOICES}
+    if value not in allowed:
+        raise HTTPException(400, f"unsupported image quality: {raw}")
+    return value
+
+
+def _image_quality_payload(raw: str | None, model: str | None) -> dict:
+    quality = _normalize_image_quality(raw)
+    return {"quality": quality} if model == GPT_IMAGE_MODEL else {}
+
+
+def _image_options_payload(size: str | None, quality: str | None, model: str | None) -> dict:
+    return {**_image_size_payload(size), **_image_quality_payload(quality, model)}
+
+
 def video_duration_options() -> list[int]:
    if video_uses_ark():
        return [5, 8, 10, 12, 15]
@@ -4774,12 +4970,12 @@ def _image_endpoint(path: str) -> str:
    return f"{base}/{path.lstrip('/')}"


-def _image_generation_response(prompt: str, model: str, size: str | None = "auto") -> dict:
+def _image_generation_response(prompt: str, model: str, size: str | None = "auto", quality: str | None = "high") -> dict:
    with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client:
        r = client.post(
            _image_endpoint("/images/generations"),
            headers={"Authorization": f"Bearer {IMAGE_API_KEY}"},
-            json={"model": model, "prompt": prompt, "n": 1, **_image_size_payload(size)},
+            json={"model": model, "prompt": prompt, "n": 1, **_image_options_payload(size, quality, model)},
        )
        r.raise_for_status()
        return r.json()
@@ -6108,6 +6304,7 @@ def health() -> dict:
            "image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS,
            "image_options": image_model_options(),
            "image_size_options": image_size_options(),
+            "image_quality_options": image_quality_options(),
            "ai_proxy_configured": bool(AI_HTTP_PROXY),
            "image_fallbacks": _image_fallback_models(),
            "image_circuit": _image_circuit_snapshot(),
@@ -6596,7 +6793,8 @@ class GenerateReq(BaseModel):
    extra_prompt: str = ""        # ✓ 需要的元素（正向）
    negative_prompt: str = ""     # ✗ 不需要的元素（负向）
    model: str = "auto"  # auto / gpt-image-2 / gemini-3-pro-image-preview
-    size: str = "auto"  # auto / 1024x1536 / 1024x1024 / 1536x1024
+    size: str = "auto"  # auto / 1024x1536 / 1088x1920 / custom WxH
+    quality: str = "high"  # low / medium / high
    mode: str = "edit"  # "edit" 带参考图，"text" 纯文字
    from_selected: bool = False   # True 时优先用 frame.selected 的生成图作 reference（迭代），否则原关键帧

@@ -6634,6 +6832,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
        raise HTTPException(400, "prompt required")
    full_prompt = _ensure_english(raw_prompt)
    image_size = _normalize_image_size(req.size)
+    image_quality = _normalize_image_quality(req.quality)
    if not IMAGE_API_KEY:
        raise HTTPException(503, "IMAGE_API_KEY 或 LLM_API_KEY 未配置")

@@ -6674,14 +6873,14 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
                        headers={
                            "Authorization": f"Bearer {IMAGE_API_KEY}",
                        },
-                        data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_size_payload(image_size)},
+                        data={"model": current_model, "prompt": full_prompt, "n": "1", **_image_options_payload(image_size, image_quality, current_model)},
                        files={"image": ("reference.jpg", img_bytes_in, "image/jpeg")},
                    )
                    r.raise_for_status()
                    resp_data = r.json()
            else:
                # text-only
-                resp_data = _image_generation_response(full_prompt, current_model, image_size)
+                resp_data = _image_generation_response(full_prompt, current_model, image_size, image_quality)

            if resp_data.get("data"):
                effective_mode = f"{current_mode}:{current_model}"
@@ -6746,6 +6945,13 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
    gen_dir.mkdir(parents=True, exist_ok=True)
    out_path = gen_dir / f"{idx:03d}_{gen_id}.jpg"
    out_path.write_bytes(out_bytes)
+    actual_width = 0
+    actual_height = 0
+    try:
+        with Image.open(io.BytesIO(out_bytes)) as im:
+            actual_width, actual_height = im.size
+    except Exception:
+        pass

    new_gen = GeneratedImage(
        id=gen_id,
@@ -6753,6 +6959,10 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
        model=model,
        mode=effective_mode,
        url=f"/jobs/{job_id}/frames/{idx}/gen/{gen_id}.jpg",
+        size=image_size,
+        quality=image_quality,
+        width=actual_width,
+        height=actual_height,
        selected=False,
        created_at=_time.time(),
    )