fix: stabilize image gateway transport

2026-05-18 09:17:24 +08:00
parent 9afed8537d
commit cf648eaac2
5 changed files with 100 additions and 11 deletions
--- a/RULES.md
+++ b/RULES.md
@@ -62,6 +62,7 @@
 - `PRODUCT_VIEW_MODEL`：同一产品素材池的视角标注/自动识别模型；当前按项目要求强制使用 `gpt-image-2`
 - `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`：OpenAI 兼容生图网关；当前所有生图入口一律强制使用 `gpt-image-2`，不做其他图片模型 fallback
 - `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`：保留兼容旧环境变量名，但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2`
+- `AI_HTTP_PROXY` / `IMAGE_HTTP_PROXY`：可选的 AI 网关出站代理；本地 launchd 后台进程不一定继承 shell 的 `http_proxy/https_proxy`，如生图报 DNS / ConnectError，可在本地 `api/.env` 配置后重启后端。`/health` 只回传是否配置代理，不回传代理地址。
 - `VOICE_PROVIDER`：配音通道，当前固定使用 `azure_openai`
 - `AZURE_OPENAI_BASE_URL` / `AZURE_OPENAI_API_KEY`：微软 Azure OpenAI 协议配音网关；本地未单独配置 Key 时回退复用 `LLM_API_KEY`
 - `AZURE_TTS_MODEL` / `AZURE_TTS_VOICE_ID` / `AZURE_TTS_VOICE_POOL` / `AZURE_TTS_PATH`：Azure OpenAI TTS 模型、默认音色、音色池和 OpenAI 协议语音路径
--- a/api/.env.example
+++ b/api/.env.example
@@ -25,6 +25,8 @@ IMAGE_MODEL=gpt-image-2
 GPT_IMAGE_MODEL=gpt-image-2
 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
+# 可选：本地网络需要代理访问 ai.skg.com 时配置；launchd 不一定继承 shell 代理变量。
+AI_HTTP_PROXY=
 VIDEO_MODEL=seedance
 VIDEO_MODEL_SEEDANCE=seedance-2-fast
 VIDEO_MODEL_KLING=kling-omni
--- a/api/main.py
+++ b/api/main.py
@@ -52,6 +52,15 @@ REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
 VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
 IMAGE_BASE_URL = os.getenv("IMAGE_BASE_URL", LLM_BASE_URL).strip()
 IMAGE_API_KEY = os.getenv("IMAGE_API_KEY", LLM_API_KEY).strip()
+AI_HTTP_PROXY = (
+    os.getenv("AI_HTTP_PROXY")
+    or os.getenv("IMAGE_HTTP_PROXY")
+    or os.getenv("HTTPS_PROXY")
+    or os.getenv("https_proxy")
+    or os.getenv("HTTP_PROXY")
+    or os.getenv("http_proxy")
+    or ""
+).strip()
 # Product decision: every image-generation/editing path is locked to gpt-image-2.
 # Environment variables may still choose the gateway URL/key, but not the model.
 GPT_IMAGE_MODEL = "gpt-image-2"
@@ -175,12 +184,33 @@ _MEDIA_BIN_CACHE: dict[str, str] = {}
 from openai import OpenAI
 _llm_client: OpenAI | None = None
 _image_client: OpenAI | None = None
+
+def ai_http_client(timeout: float = 120) -> httpx.Client:
+    """HTTP client for SKG AI gateway calls.
+
+    launchd does not reliably inherit interactive-shell proxy variables, so the
+    app also supports an explicit AI_HTTP_PROXY / IMAGE_HTTP_PROXY in api/.env.
+    """
+    kwargs: dict = {"timeout": timeout}
+    if AI_HTTP_PROXY:
+        kwargs["proxy"] = AI_HTTP_PROXY
+    return httpx.Client(**kwargs)
+
+
+def openai_http_client(timeout: float = 120) -> httpx.Client | None:
+    return ai_http_client(timeout=timeout) if AI_HTTP_PROXY else None
+
+
 def llm() -> OpenAI:
    global _llm_client
    if _llm_client is None:
        if not LLM_API_KEY:
            raise RuntimeError("LLM_API_KEY 未配置")
-        _llm_client = OpenAI(base_url=LLM_BASE_URL or None, api_key=LLM_API_KEY)
+        kwargs = {"base_url": LLM_BASE_URL or None, "api_key": LLM_API_KEY}
+        http_client = openai_http_client()
+        if http_client:
+            kwargs["http_client"] = http_client
+        _llm_client = OpenAI(**kwargs)
    return _llm_client

 def image_llm() -> OpenAI:
@@ -188,7 +218,11 @@ def image_llm() -> OpenAI:
    if _image_client is None:
        if not IMAGE_API_KEY:
            raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置")
-        _image_client = OpenAI(base_url=IMAGE_BASE_URL or None, api_key=IMAGE_API_KEY)
+        kwargs = {"base_url": IMAGE_BASE_URL or None, "api_key": IMAGE_API_KEY}
+        http_client = openai_http_client()
+        if http_client:
+            kwargs["http_client"] = http_client
+        _image_client = OpenAI(**kwargs)
    return _image_client

 def product_view_llm() -> OpenAI:
@@ -2576,18 +2610,55 @@ def _image_retry_delay(attempt: int, status_code: int = 0, body: str = "", retry
    return [1.0, 2.0, 4.0, 8.0][min(attempt, 3)]


+def _image_is_transport_error(message: str) -> bool:
+    lower = message.lower()
+    return any(
+        token in lower
+        for token in (
+            "connecterror",
+            "connecttimeout",
+            "readtimeout",
+            "timeout",
+            "nodename nor servname",
+            "name or service not known",
+            "temporary failure in name resolution",
+            "operation not permitted",
+            "connection refused",
+            "network is unreachable",
+        )
+    )
+
+
 def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_seen: bool) -> str:
    if capacity_seen:
        return (
            f"{kind} failed after {attempts} attempts: gpt-image-2 上游负载饱和，"
            f"已自动退避重试仍失败，请稍后点重试。最后错误：{last_err}"
        )
+    if _image_is_transport_error(last_err):
+        return (
+            f"{kind} failed after {attempts} attempts: 图片网关网络/DNS 连接失败，"
+            "请确认本机网络或在 api/.env 配置 AI_HTTP_PROXY / IMAGE_HTTP_PROXY 后重启后端。"
+            f"最后错误：{last_err}"
+        )
    return f"{kind} failed after {attempts} attempts: {last_err}"


 def _image_error_status(error: Exception) -> int:
    msg = str(error)
-    return 503 if ("上游负载饱和" in msg or "HTTP 429" in msg or "saturated" in msg.lower()) else 500
+    return 503 if (
+        "上游负载饱和" in msg
+        or "HTTP 429" in msg
+        or "saturated" in msg.lower()
+        or _image_is_transport_error(msg)
+    ) else 500
+
+
+def _image_endpoint(path: str) -> str:
+    base = (IMAGE_BASE_URL or "").strip().rstrip("/")
+    if not base:
+        raise RuntimeError("IMAGE_BASE_URL 或 LLM_BASE_URL 未配置")
+    return f"{base}/{path.lstrip('/')}"


 def _prepare_image_edit_bytes(image_path: Path, max_side: int) -> bytes:
@@ -2645,9 +2716,9 @@ def _image_edit_call(
        retry_after: str | None = None
        try:
            if current_mode == "edit":
-                with httpx.Client(timeout=120) as client:
+                with ai_http_client(timeout=120) as client:
                    r = client.post(
-                        f"{IMAGE_BASE_URL}/images/edits",
+                        _image_endpoint("/images/edits"),
                        headers={
                            "Authorization": f"Bearer {IMAGE_API_KEY}",
                        },
@@ -2696,7 +2767,7 @@ def _image_edit_call(
    item = data_arr[0]
    b64 = item.get("b64_json")
    if not b64 and item.get("url"):
-        with httpx.Client(timeout=120) as client:
+        with ai_http_client(timeout=120) as client:
            image_resp = client.get(item["url"])
            image_resp.raise_for_status()
            return image_resp.content, effective_mode
@@ -2943,6 +3014,7 @@ def health() -> dict:
            "product_view": PRODUCT_VIEW_MODEL,
            "image": IMAGE_MODEL,
            "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
+            "ai_proxy_configured": bool(AI_HTTP_PROXY),
            "image_fallbacks": [GPT_IMAGE_MODEL],
            "subject_image": SUBJECT_ASSET_IMAGE_MODEL,
            "subject_image_fallbacks": SUBJECT_ASSET_IMAGE_MODELS,
@@ -3267,9 +3339,9 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
            if current_mode == "edit":
                if img_bytes_in is None:
                    raise RuntimeError("edit mode reference image missing")
-                with httpx.Client(timeout=120) as client:
+                with ai_http_client(timeout=120) as client:
                    r = client.post(
-                        f"{IMAGE_BASE_URL}/images/edits",
+                        _image_endpoint("/images/edits"),
                        headers={
                            "Authorization": f"Bearer {IMAGE_API_KEY}",
                        },
@@ -3322,7 +3394,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
    if b64:
        out_bytes = b64lib.b64decode(b64)
    elif item.get("url"):
-        with httpx.Client(timeout=120) as client:
+        with ai_http_client(timeout=120) as client:
            image_resp = client.get(item["url"])
            image_resp.raise_for_status()
            out_bytes = image_resp.content
--- a/deploy/.env.production.example
+++ b/deploy/.env.production.example
@@ -30,6 +30,8 @@ IMAGE_MODEL=gpt-image-2
 GPT_IMAGE_MODEL=gpt-image-2
 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2
 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
+# Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking.
+AI_HTTP_PROXY=

 # Audio rewrite and Azure OpenAI TTS
 AUDIO_REWRITE_MODEL=gemini-2.5-pro
--- a/docs/source-analysis.html
+++ b/docs/source-analysis.html
@@ -889,13 +889,13 @@ ProductRefStateItem {
            <tr><td>应用清洗</td><td><code>POST /cleanup/apply</code></td><td><code>applyCleanedFrame</code></td><td>物理覆盖 frames/{idx}.jpg，并备份原图。</td></tr>
            <tr><td>元素增改删</td><td><code>POST/PATCH/DELETE /elements</code></td><td><code>addElement/updateElement/deleteElement</code></td><td>让用户修正 Vision 错误，避免候选结果锁死。</td></tr>
            <tr><td>元素提取</td><td><code>POST /elements/{element_id}/cutout</code></td><td><code>cutoutElement</code></td><td>调用图像模型生成独立白底素材图，每次累积一张 cutout。</td></tr>
-            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code><br><code>DELETE /elements/{element_id}/subject-assets/{asset_id}</code></td><td><code>generateSubjectAssets</code><br><code>deleteSubjectAsset</code></td><td>根据参考帧和可选内置形象重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，也可传 <code>character_id</code> 选择 5 套内置透明骨架形象之一。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>source_actor</code> 两种相似主体：透明骨架人会保持透明/半透明皮肤包裹可见白色骨架，普通真人会保持正常广告演员方向。两种模式都使用 <code>reconstruction_mode=similar</code>，后端最多把 10 张参考图作为独立 <code>image[]</code> 提交给 <code>gpt-image-2</code>，生成默认 10 张 2048 高清白底图：正面、左右 45、左右侧、背面、肩颈正/左右近景、后颈肩背特写。Prompt 明确这是肩颈按摩设备视频素材，要求脖颈、锁骨、肩线、上背和肩胛区域清晰无遮挡；内置形象只作为创意方向，不照抄。后端强制使用 <code>gpt-image-2</code>，不再接受前端或环境变量切到其他图片模型，也不做图片模型 fallback；后端会加身份锁定约束，统一性别表现、年龄段、体型、材质、风格和视觉身份，避免整套图混成不同人物。如果参考帧是竖屏，prompt 会明确要求竖版 9:16 风格画布，落盘也按源帧纵横比归一化。前端白底视图缩略图和关键帧一样，鼠标停留会用顶层浮层放大预览，点击仍打开原图；后端每个 <code>view</code> 单独调用一次生图，并明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版，保证一个视角一张照片。<code>replace_views=true</code> 时会替换同一视角旧图；删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。</td></tr>
+            <tr><td>主体资产包</td><td><code>POST /elements/{element_id}/subject-assets</code><br><code>DELETE /elements/{element_id}/subject-assets/{asset_id}</code></td><td><code>generateSubjectAssets</code><br><code>deleteSubjectAsset</code></td><td>根据参考帧和可选内置形象重新绘制一个统一主体资产包；前端默认把全部关键帧作为 <code>source_frame_indices</code>，如果用户手动选择了关键帧则只传已选帧，也可传 <code>character_id</code> 选择 5 套内置透明骨架形象之一。当前源视频工作区支持 <code>subject_style=transparent_human</code> 和 <code>source_actor</code> 两种相似主体：透明骨架人会保持透明/半透明皮肤包裹可见白色骨架，普通真人会保持正常广告演员方向。两种模式都使用 <code>reconstruction_mode=similar</code>，后端最多把 10 张参考图作为独立 <code>image[]</code> 提交给 <code>gpt-image-2</code>，生成默认 10 张 2048 高清白底图：正面、左右 45、左右侧、背面、肩颈正/左右近景、后颈肩背特写。Prompt 明确这是肩颈按摩设备视频素材，要求脖颈、锁骨、肩线、上背和肩胛区域清晰无遮挡；内置形象只作为创意方向，不照抄。后端强制使用 <code>gpt-image-2</code>，不再接受前端或环境变量切到其他图片模型，也不做图片模型 fallback；后端会加身份锁定约束，统一性别表现、年龄段、体型、材质、风格和视觉身份，避免整套图混成不同人物。如果参考帧是竖屏，prompt 会明确要求竖版 9:16 风格画布，落盘也按源帧纵横比归一化。前端白底视图缩略图和关键帧一样，鼠标停留会用顶层浮层放大预览，点击仍打开原图；后端每个 <code>view</code> 单独调用一次生图，并明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版，保证一个视角一张照片。图片调用走统一 <code>ai_http_client</code>，可用 <code>AI_HTTP_PROXY</code> / <code>IMAGE_HTTP_PROXY</code> 处理本地 launchd 代理继承问题；网络/DNS 失败返回 503 并提示检查代理配置。<code>replace_views=true</code> 时会替换同一视角旧图；删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。</td></tr>
            <tr><td>首尾帧资产</td><td><code>POST /frames/{idx}/scene-asset</code></td><td><code>generateSceneAsset</code></td><td>同一接口兼容旧场景图和新首尾帧；新流程传 <code>asset_role=first_frame/last_frame</code>，后端走文字生图，参考帧只用于理解透明骨架人形象、比例、机位和光线，生成结果仍保存在 <code>scene_assets</code> 并自动填入产品融合镜头。</td></tr>
            <tr><td>产品图库</td><td><code>GET /product-library/skg</code></td><td><code>listProductLibrary</code></td><td>读取内置 SKG 白底图库 manifest，返回产品标题、品类、尺寸、白底评分和预览图 URL。</td></tr>
            <tr><td>产品图入库到 job</td><td><code>POST /jobs/{id}/assets</code>、<code>POST /jobs/{id}/assets/product-library</code></td><td><code>uploadStoryboardAsset</code>、<code>copyProductLibraryAsset</code></td><td>上传产品图或把内置产品图库条目复制为当前 job 的普通 asset。后端统一生成最长边 1600px、JPEG 92 的 AI 工作副本，透明底铺白，过大/过小图片会在 <code>ImageRef.asset_meta</code> 里返回转换动作和风险；黑底/白底背景本身不强行转换。注意该接口只写图片文件，产品素材池列表另由 <code>PUT /jobs/{id}/product-refs</code> 持久化。</td></tr>
            <tr><td>产品素材池保存</td><td><code>PUT /jobs/{id}/product-refs</code></td><td><code>saveProductRefs</code></td><td>把当前 job 的产品素材池列表、识别视角、用途标签、方向、结构点、备注、AI 补图和删除结果保存到 <code>Job.product_refs</code> / <code>state.json</code>。前端上传、识别完成、补角度、编辑备注和删除时都会同步保存；刷新页面或热更新后从 job 恢复，不再要求重新上传和重新识别。</td></tr>
            <tr><td>产品视角识别</td><td><code>POST /jobs/{id}/assets/product-views/analyze</code></td><td><code>analyzeProductViews</code></td><td>读取同一产品素材池，按批次把多张图一次性提交给 <code>PRODUCT_VIEW_MODEL=gpt-image-2</code> 做视角标注，不限制只看前 6 张；识别对象被固定为套在脖子上的 U 形肩颈按摩仪。返回 <code>view</code>、<code>background</code>、<code>use_tags</code>、<code>orientation</code>、<code>landmarks</code>、中文备注、生成风险和置信度；<code>orientation</code> 明确佩戴者左/右、上/下、内外侧和开口方向对应图中哪边，避免把图片左右误当产品左右。前端不再要求用户手动选择视角，也不做不同产品身份判断。</td></tr>
-            <tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考，通过 <code>gpt-image-2</code> 自动补全缺失视角，输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图，而是按目标视角给已上传/已标注参考图打分，优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图，最多传 6 张；后端通过 <code>/images/edits</code> multipart 的多张 <code>image[]</code> 直接提交给 <code>gpt-image-2</code>，不再把参考图拼成一张板，降低模型误解成拼图/多产品的概率。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例，并禁止输出拼图/多产品；遇到上游 429 / saturated 会按退避节奏重试，最终仍失败时返回 503 和可读提示。</td></tr>
+            <tr><td>产品缺角度补图</td><td><code>POST /jobs/{id}/assets/product-angle</code></td><td><code>generateProductAngleAsset</code></td><td>用当前同一产品素材池作为参考，通过 <code>gpt-image-2</code> 自动补全缺失视角，输出新的 <code>ImageRef(kind="asset")</code>。前端不再固定传第一张图，而是按目标视角给已上传/已标注参考图打分，优先选择真实上传图、目标相邻视角、侧厚/触点/底部对应用途标签和低风险高置信图，最多传 6 张；后端通过 <code>/images/edits</code> multipart 的多张 <code>image[]</code> 直接提交给 <code>gpt-image-2</code>，不再把参考图拼成一张板，降低模型误解成拼图/多产品的概率。Prompt 会约束白底产品图、左右非对称、厚度、内侧触点和肩颈真实佩戴比例，并禁止输出拼图/多产品；遇到上游 429 / saturated 会按退避节奏重试，最终仍失败时返回 503 和可读提示；遇到 DNS / ConnectError 也返回 503，并提示配置 <code>AI_HTTP_PROXY</code> / <code>IMAGE_HTTP_PROXY</code>。</td></tr>
            <tr><td>角色库</td><td><code>GET /character-library/skg</code></td><td><code>listCharacterLibrary</code></td><td>读取内置 5 个透明骨架人角色 manifest，每个角色含正面、左右 45 度、侧面、背面、半身近景和背部特写 7 张参考图。</td></tr>
            <tr><td>角色图入库到 job</td><td><code>POST /jobs/{id}/assets/character-library</code></td><td><code>copyCharacterLibraryAssets</code></td><td>把所选角色的 7 张参考图复制为当前 job asset，返回 <code>subject_images</code>，产品融合生成视频时作为人物身份参考图提交。</td></tr>
            <tr><td>产品融合引导图</td><td><code>POST /jobs/{id}/product-fusion/guide</code></td><td><code>createProductFusionGuide</code></td><td>旧流程兼容接口：读取产品图和白底人物图，按 <code>product_region</code> 合成位置引导图。当前内置角色 + 产品 + 描述流程不再主动调用它。</td></tr>
@@ -1005,6 +1005,18 @@ ProductRefStateItem {
        <h2>变更记录</h2>
        <p>这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。</p>
        <div class="changelog">
+          <article class="change">
+            <header>
+              <h3>2026-05-18 · 生图网关增加显式代理和网络错误提示</h3>
+              <span class="tag violet">API</span>
+              <span class="tag amber">Ops</span>
+            </header>
+            <div class="body">
+              <p><strong>问题：</strong>相似主体高清视图包调用 <code>gpt-image-2</code> 时出现 <code>ConnectError: nodename nor servname provided</code>。排查确认 <code>IMAGE_BASE_URL</code> 配置正确、模型列表可访问，错误发生在连接层；本地 launchd 启动的 API 进程没有稳定继承交互 shell 的代理环境。</p>
+              <p><strong>改动：</strong><code>api/main.py</code> 新增 <code>AI_HTTP_PROXY</code> / <code>IMAGE_HTTP_PROXY</code> 支持，OpenAI-compatible client 和所有图片 <code>/images/edits</code> / 图片 URL 下载统一走 <code>ai_http_client</code>。<code>/health</code> 新增 <code>models.ai_proxy_configured</code>，只显示是否配置代理，不暴露代理地址。图片 DNS、ConnectError、连接拒绝、网络不可达等传输层失败会归类为 503，并提示检查本机网络或在 <code>api/.env</code> 配置代理后重启后端。</p>
+              <p><strong>影响：</strong><code>api/main.py</code>、<code>api/.env.example</code>、<code>deploy/.env.production.example</code>、<code>RULES.md</code>、<code>docs/source-analysis.html</code>。后续如果生图失败但模型名仍是 <code>gpt-image-2</code>，先区分 429 上游饱和、400 参数错误和 503 网络/代理错误，不要直接改 prompt 或换模型。</p>
+            </div>
+          </article>
          <article class="change">
            <header>
              <h3>2026-05-18 · 相似主体升级为内置形象 + 肩颈高清视图包</h3>