auto-save 2026-05-14 12:20 (~4)

This commit is contained in:
2026-05-14 12:20:57 +08:00
parent e9e0ca8f42
commit df6f0c3bc4
4 changed files with 120 additions and 56 deletions

View File

@@ -1436,16 +1436,24 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
"Use English for the transcript. If exact timestamps are uncertain, return one segment "
f"from 0 to {duration:.2f} seconds."
)
resp = llm().chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
]}],
temperature=0,
)
content = (resp.choices[0].message.content or "").strip()
return _parse_asr_segments(content, duration)
last_error: Exception | None = None
for attempt in range(3):
try:
resp = llm().chat.completions.create(
model=ASR_FALLBACK_MODEL,
messages=[{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
]}],
temperature=0,
)
content = (resp.choices[0].message.content or "").strip()
return _parse_asr_segments(content, duration)
except Exception as e:
last_error = e
if attempt < 2:
time.sleep(1.0)
raise last_error or RuntimeError("Gemini audio transcription failed")
def _transcribe_sync(wav: Path) -> list[dict]:
@@ -1710,7 +1718,17 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
# 1) whisper ASR
progress(f"{ASR_MODEL} 转录中…", 78)
segments = _transcribe_sync(wav)
try:
segments = _transcribe_sync(wav)
except Exception:
if job.transcript:
segments = [
{"start": seg.start, "end": seg.end, "text": seg.en}
for seg in job.transcript
if seg.en.strip()
]
else:
raise
if not segments:
raise RuntimeError("ASR 返回 0 段(可能无人声 / 格式问题)")
@@ -3699,12 +3717,26 @@ def create_product_fusion_guide(job_id: str, req: ProductFusionShot) -> dict:
def fallback_product_fusion_descriptions() -> list[str]:
return [
"透明骨架人双手拿起 SKG 颈部按摩仪,准备戴到脖子上,镜头轻微推近产品",
"透明骨架人 SKG 按摩仪贴合到肩颈位置,手部轻轻调整两侧机身角度",
"透明骨架人坐在场景中轻按侧边控制区,产品保持真实比例并清晰可见",
"透明骨架人闭眼放松,肩颈从紧绷变舒展,产品佩戴位置稳定不漂移",
"镜头靠近展示 SKG 产品材质、按键和内侧触点,透明骨架人的手部不要遮挡产品主体",
"使用后的放松状态收尾,透明骨架人自然抬头,产品仍保持白色 U 形外观和真实比例",
"清晨卧室柔光里,透明骨架人把白色 SKG 颈部按摩仪轻戴到后颈,微微闭眼露出放松微笑",
"现代客厅沙发旁,透明骨架人双手扶住 SKG 机身两侧,肩线慢慢放低,表情从紧绷变舒适",
"居家办公桌前,透明骨架人轻按 SKG 侧边控制键,颈部骨架区域清晰可见,神情安静享受",
"暖色卧室床边,透明骨架人佩戴 SKG 后轻轻仰头,白色骨架与透明外壳干净明亮,画面高级",
"落地窗自然光下透明骨架人坐姿端正SKG 产品贴合后颈,嘴角微扬呈现轻松舒缓状态",
"简洁浴室镜前,透明骨架人用双手调整 SKG 贴合角度,眼神柔和,产品白色机身清楚可辨",
"午后阳台休息区,透明骨架人戴着 SKG 慢慢侧头伸展,肩颈线条舒展,表情舒适而不夸张。",
"高端影棚白色背景中,透明骨架人平稳转身展示 SKG 佩戴效果,产品比例真实,轮廓清晰。",
"健身后休息长椅上,透明骨架人把 SKG 放上肩颈,呼吸放慢,脸上出现明显放松感。",
"办公会议间隙,透明骨架人靠在椅背上佩戴 SKG轻轻闭眼画面传达短暂恢复和舒适休息。",
"夜晚卧室暖灯下,透明骨架人坐在床沿使用 SKG肩颈骨架被柔和光线照亮神情安稳享受。",
"城市公寓客厅里,透明骨架人一边看向窗外一边使用 SKG动作自然产品贴合不漂移。",
"极简桌面场景中,透明骨架人拿起 SKG 靠近颈部,镜头轻推展示产品材质和佩戴准备动作。",
"木质休闲椅上,透明骨架人佩戴 SKG 后轻轻呼气,肩部下沉,脸部呈现舒缓满足的微笑。",
"白色商业摄影场景里,透明骨架人用指尖轻触 SKG 按键,产品细节清晰,人物状态轻松专业。",
"温暖客厅地毯旁透明骨架人坐姿放松SKG 稳定贴合后颈,闭眼感受舒适放松的瞬间。",
"窗边阅读角落中,透明骨架人戴着 SKG 翻开书页,动作慢而自然,表情平和享受。",
"办公室午休场景里,透明骨架人把 SKG 戴稳后靠回椅背,眼睛半闭,颈肩明显放松。",
"干净产品广告场景中,透明骨架人轻扶 SKG 两端展示佩戴贴合度,微笑自然,产品不变形。",
"收尾特写镜头里,透明骨架人佩戴 SKG 后缓慢抬头微笑,白色骨架清楚,整体干净高级。",
]
@@ -3728,8 +3760,8 @@ def generate_product_fusion_descriptions(job_id: str, req: ProductFusionDescript
products.append(f"产品角度{len(products) + 1}未填")
shot_lines.append(f"{i}. 首帧={first};尾帧={last};产品角度={products[0]} / {products[1]} / {products[2]} / {products[3]};已有描述={shot.action_text or ''}")
prompt = (
"你是 SKG 产品短视频分镜导演。请为 6 条产品融合镜头各写一条中文动作描述,"
"每条 20-45 字,必须说明透明骨架人在什么、产品如何佩戴/展示、动作如何从首帧自然过渡到尾帧"
"你是 SKG 产品短视频分镜导演。请写 20 条中文产品融合动作描述,"
"每条 35-70 字,必须说明透明骨架人在什么场景下使用产品、产品如何佩戴/展示、脸部如何舒适享受"
"产品是 SKG 白色 U 形颈部/肩颈按摩仪,四张产品角度图是同一产品的身份真源;不要写医疗治疗承诺,不要出现竞品。"
"输出 JSON{\"descriptions\":[\"...\", \"...\"]}。\n\n"
+ "\n".join(shot_lines)
@@ -3746,9 +3778,9 @@ def generate_product_fusion_descriptions(job_id: str, req: ProductFusionDescript
text = resp.choices[0].message.content or ""
data = json.loads(text)
descriptions = [str(x).strip() for x in data.get("descriptions", []) if str(x).strip()]
if len(descriptions) < 6:
descriptions = (descriptions + fallback)[:6]
return {"descriptions": descriptions[:6], "mode": "llm"}
if len(descriptions) < 20:
descriptions = (descriptions + fallback)[:20]
return {"descriptions": descriptions[:20], "mode": "llm"}
except Exception:
return {"descriptions": fallback, "mode": "fallback"}