auto-save 2026-05-18 07:05 (~8)

2026-05-18 07:06:00 +08:00
parent d72bf62a97
commit 5fde9f3e22
8 changed files with 164 additions and 35 deletions
--- a/api/.env.example
+++ b/api/.env.example
@@ -18,6 +18,7 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny
 LOCAL_ASR_TIMEOUT_SECONDS=180
 TRANSLATE_MODEL=gemini-2.5-flash
 REWRITE_MODEL=gemini-2.5-pro
+PRODUCT_VIEW_MODEL=gpt-image-2
 IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
 IMAGE_API_KEY=
 IMAGE_MODEL=gpt-image-2
--- a/api/main.py
+++ b/api/main.py
@@ -4624,6 +4624,8 @@ class CopyCharacterLibraryAssetReq(BaseModel):

 class GenerateProductAngleAssetReq(BaseModel):
    source_ref: dict
+    source_refs: list[dict] = Field(default_factory=list)
+    source_notes: list[str] = Field(default_factory=list)
    target_view: str
    note: str = ""

@@ -5016,7 +5018,7 @@ def product_view_batch_prompt(indices: list[int]) -> str:


 def analyze_product_view(ref_path: Path, index: int) -> dict:
-    if not LLM_API_KEY:
+    if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
        return fallback_product_view(index)
    img_b64 = base64.b64encode(ref_path.read_bytes()).decode("ascii")
    prompt = (
@@ -5029,8 +5031,8 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
        "{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}."
    )
    try:
-        resp = llm().chat.completions.create(
-            model=VISION_MODEL,
+        resp = product_view_llm().chat.completions.create(
+            model=PRODUCT_VIEW_MODEL,
            messages=[{"role": "user", "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
@@ -5050,7 +5052,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:


 def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
-    if not LLM_API_KEY:
+    if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
        return {index: fallback_product_view(index) for index, _path in paths_by_index}
    results: dict[int, dict] = {}
    for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
@@ -5062,8 +5064,8 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[
            content.append({"type": "text", "text": f"Image index {index}"})
            content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
        try:
-            resp = llm().chat.completions.create(
-                model=VISION_MODEL,
+            resp = product_view_llm().chat.completions.create(
+                model=PRODUCT_VIEW_MODEL,
                messages=[{"role": "user", "content": content}],
                response_format={"type": "json_object"},
                temperature=0.05,
@@ -5121,18 +5123,68 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
    return {"items": items, "missing_views": missing}


+def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path:
+    thumbs: list[Image.Image] = []
+    for path in paths[:6]:
+        try:
+            img = ImageOps.exif_transpose(Image.open(path)).convert("RGB")
+            img.thumbnail((520, 520), Image.Resampling.LANCZOS)
+            cell = Image.new("RGB", (560, 560), (255, 255, 255))
+            cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2))
+            thumbs.append(cell)
+        except Exception:
+            continue
+    if not thumbs:
+        raise RuntimeError("no usable product reference images")
+    cols = 3 if len(thumbs) > 2 else len(thumbs)
+    rows = (len(thumbs) + cols - 1) // cols
+    sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245))
+    for i, thumb in enumerate(thumbs):
+        sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560))
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    sheet.save(out_path, "JPEG", quality=94)
+    return out_path
+
+
@app.post("/jobs/{job_id}/assets/product-angle")
 def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
    if job_id not in JOBS:
        raise HTTPException(404, "job not found")
-    source_path = storyboard_ref_path(job_id, req.source_ref)
-    if not source_path or not source_path.exists():
+    raw_refs = [req.source_ref] + list(req.source_refs or [])
+    source_paths: list[Path] = []
+    seen_paths: set[str] = set()
+    for ref in raw_refs:
+        ref_path = storyboard_ref_path(job_id, ref)
+        if ref_path and ref_path.exists():
+            key = str(ref_path)
+            if key not in seen_paths:
+                seen_paths.add(key)
+                source_paths.append(ref_path)
+        if len(source_paths) >= 6:
+            break
+    if not source_paths:
        raise HTTPException(404, "source product image not found")
+    source_path = source_paths[0]
+    model_src = source_path
+    sheet_tmp: Path | None = None
+    if len(source_paths) > 1:
+        sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg"
+        model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp)
    target_view = (req.target_view or "目标视角").strip()
    note = (req.note or "").strip()
+    source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
+    source_note_clause = (
+        "Uploaded reference notes from the operator/view recognizer: "
+        + " | ".join(source_notes[:6])
+        + ". "
+        if source_notes
+        else ""
+    )
    prompt = (
-        "Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
+        "Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. "
+        "If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. "
        f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
+        + source_note_clause
        "Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
        "Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
        "Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
@@ -5142,9 +5194,15 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
    )
    models = [GPT_IMAGE_MODEL]
    try:
-        img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1280)
+        img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
    except RuntimeError as e:
        raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
+    finally:
+        if sheet_tmp and sheet_tmp.exists():
+            try:
+                sheet_tmp.unlink()
+            except OSError:
+                pass
    asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
    out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
    _normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)