auto-save 2026-05-14 12:20 (~4)
This commit is contained in:
76
api/main.py
76
api/main.py
@@ -1436,16 +1436,24 @@ def _transcribe_gemini_sync(wav: Path) -> list[dict]:
|
||||
"Use English for the transcript. If exact timestamps are uncertain, return one segment "
|
||||
f"from 0 to {duration:.2f} seconds."
|
||||
)
|
||||
resp = llm().chat.completions.create(
|
||||
model=ASR_FALLBACK_MODEL,
|
||||
messages=[{"role": "user", "content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
|
||||
]}],
|
||||
temperature=0,
|
||||
)
|
||||
content = (resp.choices[0].message.content or "").strip()
|
||||
return _parse_asr_segments(content, duration)
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(3):
|
||||
try:
|
||||
resp = llm().chat.completions.create(
|
||||
model=ASR_FALLBACK_MODEL,
|
||||
messages=[{"role": "user", "content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "input_audio", "input_audio": {"data": audio_b64, "format": "wav"}},
|
||||
]}],
|
||||
temperature=0,
|
||||
)
|
||||
content = (resp.choices[0].message.content or "").strip()
|
||||
return _parse_asr_segments(content, duration)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if attempt < 2:
|
||||
time.sleep(1.0)
|
||||
raise last_error or RuntimeError("Gemini audio transcription failed")
|
||||
|
||||
|
||||
def _transcribe_sync(wav: Path) -> list[dict]:
|
||||
@@ -1710,7 +1718,17 @@ def pipeline_transcribe(job_id: str, manage_job_status: bool = True) -> None:
|
||||
|
||||
# 1) whisper ASR
|
||||
progress(f"{ASR_MODEL} 转录中…", 78)
|
||||
segments = _transcribe_sync(wav)
|
||||
try:
|
||||
segments = _transcribe_sync(wav)
|
||||
except Exception:
|
||||
if job.transcript:
|
||||
segments = [
|
||||
{"start": seg.start, "end": seg.end, "text": seg.en}
|
||||
for seg in job.transcript
|
||||
if seg.en.strip()
|
||||
]
|
||||
else:
|
||||
raise
|
||||
if not segments:
|
||||
raise RuntimeError("ASR 返回 0 段(可能无人声 / 格式问题)")
|
||||
|
||||
@@ -3699,12 +3717,26 @@ def create_product_fusion_guide(job_id: str, req: ProductFusionShot) -> dict:
|
||||
|
||||
def fallback_product_fusion_descriptions() -> list[str]:
|
||||
return [
|
||||
"透明骨架人双手拿起 SKG 颈部按摩仪,准备戴到脖子上,镜头轻微推近产品。",
|
||||
"透明骨架人把 SKG 按摩仪贴合到肩颈位置,手部轻轻调整两侧机身角度。",
|
||||
"透明骨架人坐在场景中轻按侧边控制区,产品保持真实比例并清晰可见。",
|
||||
"透明骨架人闭眼放松,肩颈从紧绷变舒展,产品佩戴位置稳定不漂移。",
|
||||
"镜头靠近展示 SKG 产品材质、按键和内侧触点,透明骨架人的手部不要遮挡产品主体。",
|
||||
"使用后的放松状态收尾,透明骨架人自然抬头,产品仍保持白色 U 形外观和真实比例。",
|
||||
"清晨卧室柔光里,透明骨架人把白色 SKG 颈部按摩仪轻戴到后颈,微微闭眼露出放松微笑。",
|
||||
"现代客厅沙发旁,透明骨架人双手扶住 SKG 机身两侧,肩线慢慢放低,表情从紧绷变舒适。",
|
||||
"居家办公桌前,透明骨架人轻按 SKG 侧边控制键,颈部骨架区域清晰可见,神情安静享受。",
|
||||
"暖色卧室床边,透明骨架人佩戴 SKG 后轻轻仰头,白色骨架与透明外壳干净明亮,画面高级。",
|
||||
"落地窗自然光下,透明骨架人坐姿端正,SKG 产品贴合后颈,嘴角微扬呈现轻松舒缓状态。",
|
||||
"简洁浴室镜前,透明骨架人用双手调整 SKG 贴合角度,眼神柔和,产品白色机身清楚可辨。",
|
||||
"午后阳台休息区,透明骨架人戴着 SKG 慢慢侧头伸展,肩颈线条舒展,表情舒适而不夸张。",
|
||||
"高端影棚白色背景中,透明骨架人平稳转身展示 SKG 佩戴效果,产品比例真实,轮廓清晰。",
|
||||
"健身后休息长椅上,透明骨架人把 SKG 放上肩颈,呼吸放慢,脸上出现明显放松感。",
|
||||
"办公会议间隙,透明骨架人靠在椅背上佩戴 SKG,轻轻闭眼,画面传达短暂恢复和舒适休息。",
|
||||
"夜晚卧室暖灯下,透明骨架人坐在床沿使用 SKG,肩颈骨架被柔和光线照亮,神情安稳享受。",
|
||||
"城市公寓客厅里,透明骨架人一边看向窗外一边使用 SKG,动作自然,产品贴合不漂移。",
|
||||
"极简桌面场景中,透明骨架人拿起 SKG 靠近颈部,镜头轻推展示产品材质和佩戴准备动作。",
|
||||
"木质休闲椅上,透明骨架人佩戴 SKG 后轻轻呼气,肩部下沉,脸部呈现舒缓满足的微笑。",
|
||||
"白色商业摄影场景里,透明骨架人用指尖轻触 SKG 按键,产品细节清晰,人物状态轻松专业。",
|
||||
"温暖客厅地毯旁,透明骨架人坐姿放松,SKG 稳定贴合后颈,闭眼感受舒适放松的瞬间。",
|
||||
"窗边阅读角落中,透明骨架人戴着 SKG 翻开书页,动作慢而自然,表情平和享受。",
|
||||
"办公室午休场景里,透明骨架人把 SKG 戴稳后靠回椅背,眼睛半闭,颈肩明显放松。",
|
||||
"干净产品广告场景中,透明骨架人轻扶 SKG 两端展示佩戴贴合度,微笑自然,产品不变形。",
|
||||
"收尾特写镜头里,透明骨架人佩戴 SKG 后缓慢抬头微笑,白色骨架清楚,整体干净高级。",
|
||||
]
|
||||
|
||||
|
||||
@@ -3728,8 +3760,8 @@ def generate_product_fusion_descriptions(job_id: str, req: ProductFusionDescript
|
||||
products.append(f"产品角度{len(products) + 1}未填")
|
||||
shot_lines.append(f"{i}. 首帧={first};尾帧={last};产品角度={products[0]} / {products[1]} / {products[2]} / {products[3]};已有描述={shot.action_text or '空'}")
|
||||
prompt = (
|
||||
"你是 SKG 产品短视频分镜导演。请为 6 条产品融合镜头各写一条中文动作描述,"
|
||||
"每条 20-45 字,必须说明透明骨架人在做什么、产品如何佩戴/展示、动作如何从首帧自然过渡到尾帧。"
|
||||
"你是 SKG 产品短视频分镜导演。请写 20 条中文产品融合动作描述,"
|
||||
"每条 35-70 字,必须说明透明骨架人在什么场景下使用产品、产品如何佩戴/展示、脸部如何舒适享受。"
|
||||
"产品是 SKG 白色 U 形颈部/肩颈按摩仪,四张产品角度图是同一产品的身份真源;不要写医疗治疗承诺,不要出现竞品。"
|
||||
"输出 JSON:{\"descriptions\":[\"...\", \"...\"]}。\n\n"
|
||||
+ "\n".join(shot_lines)
|
||||
@@ -3746,9 +3778,9 @@ def generate_product_fusion_descriptions(job_id: str, req: ProductFusionDescript
|
||||
text = resp.choices[0].message.content or ""
|
||||
data = json.loads(text)
|
||||
descriptions = [str(x).strip() for x in data.get("descriptions", []) if str(x).strip()]
|
||||
if len(descriptions) < 6:
|
||||
descriptions = (descriptions + fallback)[:6]
|
||||
return {"descriptions": descriptions[:6], "mode": "llm"}
|
||||
if len(descriptions) < 20:
|
||||
descriptions = (descriptions + fallback)[:20]
|
||||
return {"descriptions": descriptions[:20], "mode": "llm"}
|
||||
except Exception:
|
||||
return {"descriptions": fallback, "mode": "fallback"}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user