diff --git a/RULES.md b/RULES.md index 4ddba89..3e96bc3 100644 --- a/RULES.md +++ b/RULES.md @@ -73,6 +73,7 @@ - `AUDIO_PRODUCT_BRIEF`:音频口播改写时注入的 SKG 产品卖点 - `PRODUCT_VIEW_MODEL`:同一产品素材池的视角标注/自动识别模型;当前按项目要求强制使用 `gpt-image-2` - `IMAGE_BASE_URL` / `IMAGE_API_KEY` / `IMAGE_MODEL`:OpenAI 兼容生图网关;当前所有生图入口一律强制使用 `gpt-image-2`,不做其他图片模型 fallback +- `IMAGE_REQUEST_TIMEOUT_SECONDS`:单次图片网关请求超时,默认 60 秒;超时会直接把该视图标失败并继续下一张,避免主体 6 视图整包长时间无反馈 - `GPT_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODEL` / `SUBJECT_ASSET_IMAGE_MODELS`:保留兼容旧环境变量名,但服务端会强制主体 6 视图和所有其他生图入口都只使用 `gpt-image-2` - `AI_HTTP_PROXY` / `IMAGE_HTTP_PROXY`:可选的 AI 网关出站代理;本地 launchd 后台进程不一定继承 shell 的 `http_proxy/https_proxy`,如生图报 DNS / ConnectError,可在本地 `api/.env` 配置后重启后端。`/health` 只回传是否配置代理,不回传代理地址。 - `YTDLP_COOKIES_FILE` / `YTDLP_COOKIES_FROM_BROWSER`:可选 TikTok 下载登录态;生产云端固定使用 cookies 文件 `/run/secrets/tiktok_cookies.txt`(宿主机 `./secrets/tiktok_cookies.txt` 挂载进容器),本地开发可临时用浏览器 cookies。cookies 文件属于敏感登录态,只能放本机或服务器私有路径,不允许入库。 diff --git a/api/.env.example b/api/.env.example index 2cd984d..6795d19 100644 --- a/api/.env.example +++ b/api/.env.example @@ -24,6 +24,7 @@ PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 IMAGE_API_KEY= IMAGE_MODEL=gpt-image-2 +IMAGE_REQUEST_TIMEOUT_SECONDS=60 GPT_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2 diff --git a/api/main.py b/api/main.py index dd73860..3390a38 100644 --- a/api/main.py +++ b/api/main.py @@ -104,6 +104,7 @@ IMAGE_MODEL = GPT_IMAGE_MODEL PRODUCT_VIEW_MODEL = GPT_IMAGE_MODEL SUBJECT_ASSET_IMAGE_MODEL = GPT_IMAGE_MODEL SUBJECT_ASSET_IMAGE_MODELS = [GPT_IMAGE_MODEL] +IMAGE_REQUEST_TIMEOUT_SECONDS = max(15, min(180, int(os.getenv("IMAGE_REQUEST_TIMEOUT_SECONDS", "60")))) PRODUCT_ASSET_MAX_SIDE = max(1024, int(os.getenv("PRODUCT_ASSET_MAX_SIDE", "1600"))) PRODUCT_ASSET_MIN_LONG_SIDE = max(512, int(os.getenv("PRODUCT_ASSET_MIN_LONG_SIDE", "900"))) PRODUCT_ASSET_MIN_SHORT_SIDE = max(320, int(os.getenv("PRODUCT_ASSET_MIN_SHORT_SIDE", "600"))) @@ -3516,6 +3517,13 @@ def _image_failure_message(kind: str, attempts: int, last_err: str, capacity_see f"{kind} failed after {attempts} attempts: gpt-image-2 上游负载饱和," f"已自动退避重试仍失败,请稍后点重试。最后错误:{last_err}" ) + if "timeout" in last_err.lower(): + return ( + f"{kind} failed after {attempts} attempts: gpt-image-2 图片网关响应超时" + f"(单次 {IMAGE_REQUEST_TIMEOUT_SECONDS}s),模型未更改。" + f"请检查 {IMAGE_BASE_URL or LLM_BASE_URL or 'image gateway'} 的 gpt-image-2 上游渠道或稍后重试。" + f"最后错误:{last_err}" + ) if _image_is_transport_error(last_err): return ( f"{kind} failed after {attempts} attempts: 图片网关网络/DNS 连接失败," @@ -3542,6 +3550,38 @@ def _image_endpoint(path: str) -> str: return f"{base}/{path.lstrip('/')}" +def _image_generation_response(prompt: str, model: str) -> dict: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: + r = client.post( + _image_endpoint("/images/generations"), + headers={"Authorization": f"Bearer {IMAGE_API_KEY}"}, + json={"model": model, "prompt": prompt, "n": 1}, + ) + r.raise_for_status() + return r.json() + + +def _image_should_retry( + attempt: int, + total_attempts: int, + status_code: int, + body: str, + last_err: str, + next_mode_changed: bool = False, +) -> bool: + if attempt >= total_attempts - 1: + return False + if next_mode_changed and status_code not in (401, 403): + if status_code == 0 and _image_is_transport_error(last_err): + return False + return True + if status_code in (400, 401, 403, 404): + return False + if status_code == 0 and _image_is_transport_error(last_err): + return False + return True + + def _prepare_image_edit_bytes(image_path: Path, max_side: int) -> bytes: import io as _io from PIL import Image as _PILImage @@ -3590,14 +3630,16 @@ def _image_edit_call( resp_data: dict = {} effective_mode = "edit" capacity_seen = False + attempts_done = 0 for attempt, current_mode in enumerate(plan): + attempts_done = attempt + 1 current_model = models_cycle[min(attempt, len(models_cycle) - 1)] status_code = 0 body = "" retry_after: str | None = None try: if current_mode == "edit": - with ai_http_client(timeout=120) as client: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: r = client.post( _image_endpoint("/images/edits"), headers={ @@ -3616,8 +3658,7 @@ def _image_edit_call( r.raise_for_status() resp_data = r.json() else: - resp = image_llm().images.generate(model=current_model, prompt=prompt, n=1) - resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]} + resp_data = _image_generation_response(prompt, current_model) if resp_data.get("data"): effective_mode = current_mode model = current_model # 记录实际成功的 model @@ -3636,19 +3677,22 @@ def _image_edit_call( except Exception as e: last_err = f"{type(e).__name__}: {e} · model={current_model}" - if attempt < len(plan) - 1: + next_mode_changed = attempt < len(plan) - 1 and plan[attempt + 1] != current_mode + if _image_should_retry(attempt, len(plan), status_code, body, last_err, next_mode_changed): tag = f"retry {attempt + 1}/{len(plan)} → {GPT_IMAGE_MODEL}" delay = _image_retry_delay(attempt, status_code, body, retry_after) print(f"[image edit {tag}, sleep {delay:.0f}s] {last_err}", flush=True) _time.sleep(delay) + else: + break data_arr = resp_data.get("data", []) if not data_arr: - raise RuntimeError(_image_failure_message("image edit", len(plan), last_err, capacity_seen)) + raise RuntimeError(_image_failure_message("image edit", attempts_done, last_err, capacity_seen)) item = data_arr[0] b64 = item.get("b64_json") if not b64 and item.get("url"): - with ai_http_client(timeout=120) as client: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: image_resp = client.get(item["url"]) image_resp.raise_for_status() return image_resp.content, effective_mode @@ -3666,35 +3710,51 @@ def _image_text_call( """Text-only image generation. 生图模型强制使用 gpt-image-2。""" import base64 as b64lib import time as _time + import httpx if not IMAGE_API_KEY: raise RuntimeError("IMAGE_API_KEY 或 LLM_API_KEY 未配置") models_cycle = [GPT_IMAGE_MODEL] last_err = "" - resp_data: dict = {} capacity_seen = False + attempts_done = 0 for attempt in range(max_attempts): + attempts_done = attempt + 1 current_model = models_cycle[min(attempt, len(models_cycle) - 1)] status_code = 0 body = "" + retry_after: str | None = None try: - resp = image_llm().images.generate(model=current_model, prompt=prompt, n=1) - resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]} + resp_data = _image_generation_response(prompt, current_model) if resp_data.get("data"): - b64 = resp_data["data"][0].get("b64_json") + item = resp_data["data"][0] + b64 = item.get("b64_json") if b64: return b64lib.b64decode(b64), "text" + if item.get("url"): + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: + image_resp = client.get(item["url"]) + image_resp.raise_for_status() + return image_resp.content, "text" err_obj = resp_data.get("error") or {} last_err = f"empty data · {err_obj.get('code', '')} · {str(err_obj.get('message', ''))[:200]} · model={current_model}" + except httpx.HTTPStatusError as e: + body = e.response.text + status_code = e.response.status_code + retry_after = e.response.headers.get("retry-after") + capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body) + last_err = f"HTTP {status_code}: {body[:200]} · model={current_model}" except Exception as e: last_err = f"{type(e).__name__}: {e} · model={current_model}" body = str(e) status_code = 429 if "429" in body or "saturated" in body.lower() or "饱和" in body else 0 capacity_seen = capacity_seen or _image_is_capacity_error(status_code, body) - if attempt < max_attempts - 1: - delay = _image_retry_delay(attempt, status_code, body) + if _image_should_retry(attempt, max_attempts, status_code, body, last_err): + delay = _image_retry_delay(attempt, status_code, body, retry_after) print(f"[image text retry {attempt + 1}/{max_attempts} → {GPT_IMAGE_MODEL}, sleep {delay:.0f}s] {last_err}", flush=True) _time.sleep(delay) - raise RuntimeError(_image_failure_message("image text", max_attempts, last_err, capacity_seen)) + else: + break + raise RuntimeError(_image_failure_message("image text", attempts_done, last_err, capacity_seen)) def _image_path_to_data_url(path: Path) -> str: @@ -4054,6 +4114,7 @@ def health() -> dict: "product_view": PRODUCT_VIEW_MODEL, "image": IMAGE_MODEL, "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default", + "image_request_timeout_seconds": IMAGE_REQUEST_TIMEOUT_SECONDS, "ai_proxy_configured": bool(AI_HTTP_PROXY), "image_fallbacks": [GPT_IMAGE_MODEL], "subject_image": SUBJECT_ASSET_IMAGE_MODEL, @@ -4394,7 +4455,9 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job: last_err = "" effective_mode = req.mode capacity_seen = False + attempts_done = 0 for attempt, current_mode in enumerate(plan): + attempts_done = attempt + 1 status_code = 0 body = "" retry_after: str | None = None @@ -4402,7 +4465,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job: if current_mode == "edit": if img_bytes_in is None: raise RuntimeError("edit mode reference image missing") - with ai_http_client(timeout=120) as client: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: r = client.post( _image_endpoint("/images/edits"), headers={ @@ -4415,8 +4478,7 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job: resp_data = r.json() else: # text-only - resp = image_llm().images.generate(model=model, prompt=full_prompt, n=1) - resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]} + resp_data = _image_generation_response(full_prompt, model) if resp_data.get("data"): effective_mode = current_mode @@ -4442,22 +4504,25 @@ def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job: except Exception as e: last_err = f"{type(e).__name__}: {e}" - if attempt < len(plan) - 1: + next_mode_changed = attempt < len(plan) - 1 and plan[attempt + 1] != current_mode + if _image_should_retry(attempt, len(plan), status_code, body, last_err, next_mode_changed): next_mode = plan[attempt + 1] tag = f"fallback → {next_mode}" if next_mode != current_mode else f"retry {attempt + 1}/{len(plan)}" print(f"[image gen {tag}] {last_err}", flush=True) _time.sleep(_image_retry_delay(attempt, status_code, body, retry_after)) + else: + break data_arr = resp_data.get("data", []) if not data_arr: - raise HTTPException(503 if capacity_seen else 500, _image_failure_message("image gen", len(plan), last_err, capacity_seen)) + raise HTTPException(503 if capacity_seen else 500, _image_failure_message("image gen", attempts_done, last_err, capacity_seen)) item = data_arr[0] b64 = item.get("b64_json") if b64: out_bytes = b64lib.b64decode(b64) elif item.get("url"): - with ai_http_client(timeout=120) as client: + with ai_http_client(timeout=IMAGE_REQUEST_TIMEOUT_SECONDS) as client: image_resp = client.get(item["url"]) image_resp.raise_for_status() out_bytes = image_resp.content diff --git a/deploy/.env.production.example b/deploy/.env.production.example index a5162d4..b6ac124 100644 --- a/deploy/.env.production.example +++ b/deploy/.env.production.example @@ -42,6 +42,7 @@ PRODUCT_VIEW_MODEL=gpt-image-2 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1 IMAGE_API_KEY= IMAGE_MODEL=gpt-image-2 +IMAGE_REQUEST_TIMEOUT_SECONDS=60 GPT_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODEL=gpt-image-2 SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2 diff --git a/docs/source-analysis.html b/docs/source-analysis.html index 621e27d..0bf4344 100644 --- a/docs/source-analysis.html +++ b/docs/source-analysis.html @@ -986,7 +986,7 @@ ProductRefStateItem { 应用清洗POST /cleanup/applyapplyCleanedFrame物理覆盖 frames/{idx}.jpg,并备份原图。 元素增改删POST/PATCH/DELETE /elementsaddElement/updateElement/deleteElement让用户修正 Vision 错误,避免候选结果锁死。 元素提取POST /elements/{element_id}/cutoutcutoutElement调用图像模型生成独立白底素材图,每次累积一张 cutout。 - 主体资产包POST /elements/{element_id}/subject-assets
DELETE /elements/{element_id}/subject-assets/{asset_id}generateSubjectAssets
deleteSubjectAsset根据转换层里的参考帧重新绘制一个统一主体资产包;前端按真人重构、卡通重构、元素重构、自主描述四个方向分别管理 source_frame_indices,每个方向最多 3 张参考帧,固定请求 frontthree_quarter_leftleftbackrightthree_quarter_right 六个视图,不再暴露完整 10 / 常用 4 选择。当前源视频工作区使用 subject_style=source_actor 承接真人、元素和自主描述,使用 subject_style=cartoon_subject 承接卡通重构;旧 transparent_human 仍为兼容类型但不是当前转换层默认入口。reconstruction_mode=similar 是创新路径:后端先用 VISION_MODEL 把关键帧反推成非身份化文字 brief,再调用 gpt-image-2/images/generations 文字生图,日志会显示 image_refs=0;这里是参考重构生成套图,不是抠图、复制或 image-edit 复刻。卡通重构在后端额外加入原创卡通/插画主体约束,明确不输出真实人物复制 likeness。生成完成后,后端会把生成视图反推/写入 KeyElement.subject_consensus_brief,作为后续首尾帧的唯一主体身份文字依据。reconstruction_mode=same 仍保留旧 image-edit 路径,用于确实需要精确复刻且有授权的场景。每个 view 单独调用一次生图,明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。后端不再要求整包全成功才写入:单个视图失败时会保留已成功生成的主体图,返回“部分生成完成”,只有一张都没生成出来才返回错误。replace_views=true 时会替换同一视角旧图;删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。 + 主体资产包POST /elements/{element_id}/subject-assets
DELETE /elements/{element_id}/subject-assets/{asset_id}generateSubjectAssets
deleteSubjectAsset根据转换层里的参考帧重新绘制一个统一主体资产包;前端按真人重构、卡通重构、元素重构、自主描述四个方向分别管理 source_frame_indices,每个方向最多 3 张参考帧,固定请求 frontthree_quarter_leftleftbackrightthree_quarter_right 六个视图,不再暴露完整 10 / 常用 4 选择。当前源视频工作区使用 subject_style=source_actor 承接真人、元素和自主描述,使用 subject_style=cartoon_subject 承接卡通重构;旧 transparent_human 仍为兼容类型但不是当前转换层默认入口。reconstruction_mode=similar 是创新路径:后端先用 VISION_MODEL 把关键帧反推成非身份化文字 brief,再调用 gpt-image-2/images/generations 文字生图,日志会显示 image_refs=0;这里是参考重构生成套图,不是抠图、复制或 image-edit 复刻。卡通重构在后端额外加入原创卡通/插画主体约束,明确不输出真实人物复制 likeness。生成完成后,后端会把生成视图反推/写入 KeyElement.subject_consensus_brief,作为后续首尾帧的唯一主体身份文字依据。reconstruction_mode=same 仍保留旧 image-edit 路径,用于确实需要精确复刻且有授权的场景。每个 view 单独调用一次生图,明确禁止多视图拼图、contact sheet、多主体、多面板、标签或对比排版。单次图片请求受 IMAGE_REQUEST_TIMEOUT_SECONDS 控制,默认 60 秒;超时、DNS 或连接失败会让当前视图标失败并继续后续视图。后端不再要求整包全成功才写入:单个视图失败时会保留已成功生成的主体图,返回“部分生成完成”,只有一张都没生成出来才返回错误。replace_views=true 时会替换同一视角旧图;删除接口会移除对应 subject asset 记录并删除本地 jpg 文件。 主体套图状态SubjectAsset.status
pack_idweb/app/page.tsx
SourceSubjectPipelinegenerateSubjectAssets 现在先写入同一个 pack_id 下的 queued 占位卡并立即返回,后台按视角逐张生成,单张完成就把该占位替换成 completed 图片。前端轮询会把 queued / in_progress 主体资产纳入运行状态;主体元素区按 pack 显示套图文件夹,点击某个文件夹后展开该套图,其他套图顺位进入下方可滚动列表。 首尾帧资产POST /frames/{idx}/scene-assetgenerateSceneAsset同一接口兼容旧场景图和新首尾帧;当前信息流复刻流程传 asset_role=first_frame/last_framesubject_brief 和最多 1-2 张 product_images。首尾帧不再传主体图、不再把主体图和产品图拼成 contact sheet;主体只走文字 brief,允许新动作、新景别、新表情和新环境。若本条需要产品,后端只把产品参考图作为 gpt-image-2 image-edit 的硬视觉真源;若不需要产品,则走纯文字生图。关键帧只作为行数据承载位置。生成结果保存在 scene_assets,前端再写入 StoryboardScene.first_image/last_image。 产品图库GET /product-library/skglistProductLibrary读取内置 SKG 白底图库 manifest,返回产品标题、品类、尺寸、白底评分和预览图 URL。 @@ -1062,7 +1062,7 @@ ProductRefStateItem {
  • 主体候选确认、改名、删除和主体资产包生成能力保留在底层旧面板和接口中,当前第一步主界面不主动展示。
  • 分镜工作台 4 图槽和改造说明自动保存。
  • 音频文案轨:点击开始或提取音频后提取原文案、中文翻译、讲话人、语速节奏、背景音乐/环境声/音效;结果集中在右侧工作表展示。
  • -
  • GPT Image 生图;当前 IMAGE_MODEL 和主体 6 视图链路默认使用 gpt-image-2
  • +
  • GPT Image 生图;当前 IMAGE_MODEL 和主体 6 视图链路默认使用 gpt-image-2,单次图片网关请求默认 60 秒超时,超时后该视图标失败并继续后续视图。
  • 三字段分镜候选生成:默认行左侧露文案、场景一句话、人物+产品+动作,右侧直接展示横向视频轨;中文镜像失焦后会自动优化英文主值;支持 AI 改写预览、单条选择数量生成、追加生成、选中候选和整片按行排队提交。
  • 全局资源中心:提示词库和素材库可从顶部“资源库”打开;提示词可复制并计数,素材应用到 job 时会复制成本 job 内普通 asset。
  • @@ -1113,6 +1113,19 @@ ProductRefStateItem {

    变更记录

    这个记录不是 git log 的替代品。它记录“产品理解发生了什么变化、影响了哪些源码、你以后描述需求时该怎么说”。后续每次改功能都要补一条。

    +
    +
    +

    2026-05-19 · gpt-image-2 请求超时改为快速失败

    + API + Reliability + Config +
    +
    +

    问题:gpt-image-2 上游图片网关无响应时,文字生图仍通过 SDK 默认等待,编辑生图也按 120 秒重复尝试;主体 6 视图第一张卡住后,用户侧长时间看不到逐张失败或后续进度。

    +

    改动:api/main.py 新增 IMAGE_REQUEST_TIMEOUT_SECONDS,默认 60 秒;_image_text_call 统一改为直接调用 /images/generations_image_edit_call 和旧分镜生图也复用同一超时。超时、DNS、连接失败这类传输错误不再盲目重试三轮,会把当前视图标失败并继续处理后续视图。/health 回传当前图片超时配置。

    +

    影响:这次不改模型,所有图片入口仍固定只使用 gpt-image-2;如果继续失败,错误会明确指向当前 IMAGE_BASE_URL 上的 gpt-image-2 通道超时或不可用。

    +
    +

    2026-05-19 · 主体元素改为套图文件夹并逐张回填