auto-save 2026-05-18 07:05 (~8)

This commit is contained in:
2026-05-18 07:06:00 +08:00
parent d72bf62a97
commit 5fde9f3e22
8 changed files with 164 additions and 35 deletions

View File

@@ -18,6 +18,7 @@ LOCAL_ASR_MODEL=mlx-community/whisper-tiny
LOCAL_ASR_TIMEOUT_SECONDS=180
TRANSLATE_MODEL=gemini-2.5-flash
REWRITE_MODEL=gemini-2.5-pro
PRODUCT_VIEW_MODEL=gpt-image-2
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
IMAGE_API_KEY=
IMAGE_MODEL=gpt-image-2

View File

@@ -4624,6 +4624,8 @@ class CopyCharacterLibraryAssetReq(BaseModel):
class GenerateProductAngleAssetReq(BaseModel):
source_ref: dict
source_refs: list[dict] = Field(default_factory=list)
source_notes: list[str] = Field(default_factory=list)
target_view: str
note: str = ""
@@ -5016,7 +5018,7 @@ def product_view_batch_prompt(indices: list[int]) -> str:
def analyze_product_view(ref_path: Path, index: int) -> dict:
if not LLM_API_KEY:
if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
return fallback_product_view(index)
img_b64 = base64.b64encode(ref_path.read_bytes()).decode("ascii")
prompt = (
@@ -5029,8 +5031,8 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
"{\"view\":\"front|left_45|right_45|side_thickness|inner_contacts|back_bottom\",\"background\":\"white|black|simple|complex|unknown\",\"use_tags\":[\"hero_packshot\"],\"orientation\":{\"product_left\":\"图中哪一侧/不可见/不确定\",\"product_right\":\"图中哪一侧/不可见/不确定\",\"top\":\"图中哪一侧/不可见/不确定\",\"bottom\":\"图中哪一侧/不可见/不确定\",\"inner_side\":\"图中哪一侧/是否可见\",\"outer_side\":\"图中哪一侧/是否可见\",\"opening_direction\":\"U形开口朝图中哪一侧/不可见/不确定\"},\"landmarks\":[\"U形开口\"],\"note\":\"中文备注\",\"risk\":\"\",\"confidence\":0.86}."
)
try:
resp = llm().chat.completions.create(
model=VISION_MODEL,
resp = product_view_llm().chat.completions.create(
model=PRODUCT_VIEW_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
@@ -5050,7 +5052,7 @@ def analyze_product_view(ref_path: Path, index: int) -> dict:
def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[int, dict]:
if not LLM_API_KEY:
if not (IMAGE_API_KEY if PRODUCT_VIEW_MODEL == GPT_IMAGE_MODEL else LLM_API_KEY):
return {index: fallback_product_view(index) for index, _path in paths_by_index}
results: dict[int, dict] = {}
for start in range(0, len(paths_by_index), PRODUCT_VIEW_BATCH_SIZE):
@@ -5062,8 +5064,8 @@ def analyze_product_views_batch(paths_by_index: list[tuple[int, Path]]) -> dict[
content.append({"type": "text", "text": f"Image index {index}"})
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}})
try:
resp = llm().chat.completions.create(
model=VISION_MODEL,
resp = product_view_llm().chat.completions.create(
model=PRODUCT_VIEW_MODEL,
messages=[{"role": "user", "content": content}],
response_format={"type": "json_object"},
temperature=0.05,
@@ -5121,18 +5123,68 @@ def analyze_product_views(job_id: str, req: AnalyzeProductViewsReq) -> dict:
return {"items": items, "missing_views": missing}
def _make_product_angle_reference_sheet(paths: list[Path], out_path: Path) -> Path:
thumbs: list[Image.Image] = []
for path in paths[:6]:
try:
img = ImageOps.exif_transpose(Image.open(path)).convert("RGB")
img.thumbnail((520, 520), Image.Resampling.LANCZOS)
cell = Image.new("RGB", (560, 560), (255, 255, 255))
cell.paste(img, ((560 - img.width) // 2, (560 - img.height) // 2))
thumbs.append(cell)
except Exception:
continue
if not thumbs:
raise RuntimeError("no usable product reference images")
cols = 3 if len(thumbs) > 2 else len(thumbs)
rows = (len(thumbs) + cols - 1) // cols
sheet = Image.new("RGB", (cols * 560, rows * 560), (245, 245, 245))
for i, thumb in enumerate(thumbs):
sheet.paste(thumb, ((i % cols) * 560, (i // cols) * 560))
out_path.parent.mkdir(parents=True, exist_ok=True)
sheet.save(out_path, "JPEG", quality=94)
return out_path
@app.post("/jobs/{job_id}/assets/product-angle")
def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq) -> dict:
if job_id not in JOBS:
raise HTTPException(404, "job not found")
source_path = storyboard_ref_path(job_id, req.source_ref)
if not source_path or not source_path.exists():
raw_refs = [req.source_ref] + list(req.source_refs or [])
source_paths: list[Path] = []
seen_paths: set[str] = set()
for ref in raw_refs:
ref_path = storyboard_ref_path(job_id, ref)
if ref_path and ref_path.exists():
key = str(ref_path)
if key not in seen_paths:
seen_paths.add(key)
source_paths.append(ref_path)
if len(source_paths) >= 6:
break
if not source_paths:
raise HTTPException(404, "source product image not found")
source_path = source_paths[0]
model_src = source_path
sheet_tmp: Path | None = None
if len(source_paths) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"product_angle_refs_{uuid.uuid4().hex[:8]}.jpg"
model_src = _make_product_angle_reference_sheet(source_paths, sheet_tmp)
target_view = (req.target_view or "目标视角").strip()
note = (req.note or "").strip()
source_notes = [re.sub(r"\s+", " ", str(item)).strip()[:180] for item in (req.source_notes or []) if str(item).strip()]
source_note_clause = (
"Uploaded reference notes from the operator/view recognizer: "
+ " | ".join(source_notes[:6])
+ ". "
if source_notes
else ""
)
prompt = (
"Use the reference image as the same SKG neck-and-shoulder wearable massage product. "
"Use the reference image or reference board as evidence for the same SKG neck-and-shoulder wearable massage product. "
"If a reference board is provided, all panels are the same product from uploaded views; do not output a board, collage, or multiple products. "
f"Generate a clean product-only white-background reference image in this missing view: {target_view}. "
+ source_note_clause
"Preserve the exact product identity: white U-shaped wearable neck and shoulder massager that sits around the neck, asymmetric wearer-left and wearer-right details, side buttons, inner metal massage contacts, opening width, material, thickness, curvature, and real shoulder-neck wearing scale. "
"Use product coordinates: wearer-left/right are the user's body left/right when worn, top is near chin/upper neck, bottom is near collarbone/shoulders, inner side touches skin, outer side is the shell/buttons. "
"Do not mirror both sides into identical shapes; keep visible left/right asymmetry and believable shoulder-neck wearable proportions. "
@@ -5142,9 +5194,15 @@ def generate_product_angle_asset(job_id: str, req: GenerateProductAngleAssetReq)
)
models = [GPT_IMAGE_MODEL]
try:
img_bytes, _mode = _image_edit_call(source_path, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1280)
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=5, max_side=1600)
except RuntimeError as e:
raise HTTPException(_image_error_status(e), f"product angle generation failed: {e}")
finally:
if sheet_tmp and sheet_tmp.exists():
try:
sheet_tmp.unlink()
except OSError:
pass
asset_id = f"product_angle_{uuid.uuid4().hex[:10]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
_normalize_asset_image(img_bytes, out_path, source_path, "1024", "white", square=True, fill_subject=True)