auto-save 2026-05-13 00:22 (~4)

This commit is contained in:
2026-05-13 00:23:01 +08:00
parent 2d7c6cc3a6
commit 66fb1444c4
4 changed files with 394 additions and 27 deletions

View File

@@ -524,6 +524,13 @@
"message": "auto-save 2026-05-13 00:11 (~1)",
"hash": "703d94f",
"files_changed": 1
},
{
"ts": "2026-05-13T00:17:29+08:00",
"type": "commit",
"message": "auto-save 2026-05-13 00:17 (~1)",
"hash": "2d7c6cc",
"files_changed": 1
}
]
}

View File

@@ -28,6 +28,7 @@ ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
IMAGE_MODEL = os.getenv("IMAGE_MODEL", "gemini-3-pro-image-preview")
# OpenAI 客户端OpenAI 兼容网关,含 SKG ezlink
from openai import OpenAI
@@ -52,11 +53,22 @@ JobStatus = Literal[
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "5"))
class GeneratedImage(BaseModel):
id: str # uuid hex 12
prompt: str
model: str
mode: str = "edit" # "edit"(带参考图) | "text"(纯文字)
url: str # /jobs/{job_id}/frames/{idx}/gen/{id}.jpg
selected: bool = False
created_at: float = 0.0
class KeyFrame(BaseModel):
index: int
timestamp: float
url: str
description: dict | None = None # vision 模型识别结果 {scene, objects, style, suggested_prompt}
generated_images: list[GeneratedImage] = []
class TranscriptSegment(BaseModel):
@@ -583,6 +595,134 @@ def get_frame(job_id: str, idx: int):
return FileResponse(p, media_type="image/jpeg")
class GenerateReq(BaseModel):
prompt: str
extra_prompt: str = ""
model: str = "" # 留空用 IMAGE_MODEL 默认
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
@app.post("/jobs/{job_id}/frames/{idx}/generate", response_model=Job)
def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
"""根据关键帧 + prompt 生成新图image-to-image 或 text-to-image"""
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = next((f for f in job.frames if f.index == idx), None)
if not frame:
raise HTTPException(404, "frame not found")
frame_path = job_dir(job_id) / "frames" / f"{idx:03d}.jpg"
if not frame_path.exists():
raise HTTPException(404, "frame file missing")
full_prompt = req.prompt.strip()
if req.extra_prompt.strip():
full_prompt = f"{full_prompt}. {req.extra_prompt.strip()}"
if not full_prompt:
raise HTTPException(400, "prompt required")
model = req.model or IMAGE_MODEL
gen_id = uuid.uuid4().hex[:12]
import base64 as b64lib
import time as _time
try:
if req.mode == "edit":
# image-to-image用 generations 端点带 image 参数
img_b64 = b64lib.b64encode(frame_path.read_bytes()).decode("ascii")
data_uri = f"data:image/jpeg;base64,{img_b64}"
# OpenAI SDK 不直接支持 image 参数,用底层 httpx
import httpx
with httpx.Client(timeout=120) as client:
r = client.post(
f"{LLM_BASE_URL}/images/generations",
headers={
"Authorization": f"Bearer {LLM_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": model,
"prompt": full_prompt,
"image": data_uri,
"n": 1,
},
)
r.raise_for_status()
resp_data = r.json()
else:
# text-only
resp = llm().images.generate(model=model, prompt=full_prompt, n=1)
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
except httpx.HTTPStatusError as e:
raise HTTPException(500, f"image gen HTTP {e.response.status_code}: {e.response.text[:300]}")
except Exception as e:
raise HTTPException(500, f"image gen failed: {e}")
data_arr = resp_data.get("data", [])
if not data_arr:
raise HTTPException(500, "image gen returned no data")
item = data_arr[0]
b64 = item.get("b64_json")
if not b64:
raise HTTPException(500, "image gen returned no b64_json")
# 保存到本地 jobs/<id>/gen/<idx>_<gen_id>.jpg
gen_dir = job_dir(job_id) / "gen"
gen_dir.mkdir(parents=True, exist_ok=True)
out_path = gen_dir / f"{idx:03d}_{gen_id}.jpg"
out_path.write_bytes(b64lib.b64decode(b64))
new_gen = GeneratedImage(
id=gen_id,
prompt=full_prompt,
model=model,
mode=req.mode,
url=f"/jobs/{job_id}/frames/{idx}/gen/{gen_id}.jpg",
selected=False,
created_at=_time.time(),
)
# 写回 job.frames
for f in job.frames:
if f.index == idx:
f.generated_images = f.generated_images + [new_gen]
update(job, frames=job.frames, message=f"生图完成 · 分镜 {idx + 1}")
return job
@app.get("/jobs/{job_id}/frames/{idx}/gen/{gen_id}.jpg")
def get_generated_image(job_id: str, idx: int, gen_id: str):
p = job_dir(job_id) / "gen" / f"{idx:03d}_{gen_id}.jpg"
if not p.exists():
raise HTTPException(404, "generated image not found")
return FileResponse(p, media_type="image/jpeg")
class SelectGenReq(BaseModel):
selected: bool
@app.post("/jobs/{job_id}/frames/{idx}/gen/{gen_id}/select", response_model=Job)
def select_generated(job_id: str, idx: int, gen_id: str, req: SelectGenReq) -> Job:
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
for f in job.frames:
if f.index != idx:
continue
for g in f.generated_images:
# 单选:该帧只能选一张
if g.id == gen_id:
g.selected = req.selected
else:
g.selected = False
break
update(job, frames=job.frames)
return job
@app.post("/jobs/{job_id}/frames/{idx}/describe", response_model=Job)
def describe_frame(job_id: str, idx: int) -> Job:
"""调 vision 模型识别该关键帧,返回结构化描述。"""

View File

@@ -6,9 +6,10 @@ import {
Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2, Plus, Check,
ChevronDown, X,
} from "lucide-react"
import { type Job, frameUrl, videoUrl } from "@/lib/api"
import { type Job, type KeyFrame, frameUrl, videoUrl, generateImage, selectGenerated, generatedImageUrl } from "@/lib/api"
import { type NodeData } from "@/components/nodes"
import { FrameLightbox } from "@/components/lightbox"
import { toast } from "sonner"
type ColType = "input" | "process" | "ai" | "output"
const TYPE_GRAD: Record<ColType, string> = {
@@ -518,34 +519,19 @@ export function Dashboard({ data }: Props) {
</>
)}
{/* ---- ImageGen — Kanban ---- */}
{/* ---- ImageGen — 选中关键帧每张一卡,生成 + 多版本 ---- */}
{key === "imagegen" && (
<>
<KanbanCard tone="rose" tags={["推荐"]} title="nano-banana-pro">
<div className="text-[11px] text-[var(--text-soft)]">Gemini 3 Pro Image · SKG /v1/images/generations</div>
data.selectedFrames.size === 0 ? (
<KanbanCard tone="pink" tags={["待启动"]} title="未选关键帧">
<div className="text-[11px] text-[var(--text-soft)]"> 1+ 1 </div>
</KanbanCard>
<KanbanCard tone="rose" tags={["备选"]} title="gpt-image-2">
<div className="text-[11px] text-[var(--text-soft)]">OpenAI · SKG </div>
</KanbanCard>
{data.selectedFrames.size === 0 ? (
<KanbanCard tone="pink" tags={["待启动"]} title="未选关键帧">
<div className="text-[11px] text-[var(--text-soft)]"> 1+ 1 </div>
</KanbanCard>
) : (
Array.from({ length: data.selectedFrames.size }).map((_, i) => (
<KanbanCard
key={i}
tone="pink"
tags={[`生成图 ${i + 1}`]}
title={`分镜 ${i + 1} → AI 图`}
>
<div className="aspect-video bg-black/40 rounded-md flex items-center justify-center text-[11px] text-[var(--text-faint)]">
</div>
</KanbanCard>
))
)}
</>
) : (
Array.from(data.selectedFrames).sort((a, b) => a - b).map((frameIdx) => {
const f = data.job?.frames.find((x) => x.index === frameIdx)
if (!f || !data.job) return null
return <ImageGenCard key={frameIdx} job={data.job} frame={f} onJobUpdate={data.onJobUpdate} />
})
)
)}
{/* ---- VideoGen — Kanban ---- */}
@@ -584,3 +570,190 @@ export function Dashboard({ data }: Props) {
)
}
}
/* ============================================================
ImageGenCard — 单张关键帧的生图卡
============================================================ */
function ImageGenCard({ job, frame, onJobUpdate }: {
job: Job
frame: KeyFrame
onJobUpdate: (j: Job) => void
}) {
const [extra, setExtra] = useState("")
const [model, setModel] = useState("gemini-3-pro-image-preview")
const [mode, setMode] = useState<"edit" | "text">("edit")
const [generating, setGenerating] = useState(false)
const basePrompt = frame.description?.suggested_prompt ?? "(尚未识别 · 点关键帧打开 lightbox 先识别)"
const [editablePrompt, setEditablePrompt] = useState(basePrompt)
const [showPrompt, setShowPrompt] = useState(false)
// 当 vision 识别完成后更新默认 prompt
useEffect(() => {
if (frame.description?.suggested_prompt) {
setEditablePrompt(frame.description.suggested_prompt)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [frame.description?.suggested_prompt])
const handleGenerate = async () => {
if (!editablePrompt.trim()) {
toast.error("请先填写 prompt点上方关键帧识别会自动生成")
return
}
setGenerating(true)
try {
const updated = await generateImage(job.id, frame.index, {
prompt: editablePrompt,
extra_prompt: extra,
model,
mode,
})
onJobUpdate(updated)
toast.success(`分镜 ${frame.index + 1} 生成完成`)
} catch (e) {
toast.error("生图失败:" + (e instanceof Error ? e.message : String(e)))
} finally {
setGenerating(false)
}
}
const handleSelectGen = async (genId: string, currentlySelected: boolean) => {
try {
const updated = await selectGenerated(job.id, frame.index, genId, !currentlySelected)
onJobUpdate(updated)
} catch (e) {
toast.error("选用失败:" + (e instanceof Error ? e.message : String(e)))
}
}
const gens = frame.generated_images ?? []
const objects = frame.description?.objects ?? []
return (
<KanbanCard tone="rose" tags={[`分镜 ${frame.index + 1}`, `${frame.timestamp.toFixed(1)}s`]} title="生成参考图">
{/* 参考图 + 识别物体 chips */}
<div className="flex gap-2 items-start mt-1">
<img
src={frameUrl(job.id, frame.index)}
alt={`frame ${frame.index}`}
className="rounded-md object-cover flex-shrink-0"
style={{ width: 96, aspectRatio: `${job.width}/${job.height}` }}
/>
<div className="flex-1 min-w-0">
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1"></div>
{objects.length > 0 ? (
<div className="flex flex-wrap gap-1">
{objects.slice(0, 6).map((o, i) => (
<button
key={i}
onClick={() => setExtra((p) => p ? `${p}, ${o.name}` : o.name)}
className="text-[10px] px-1.5 py-0.5 rounded-full bg-white/[0.06] hover:bg-white/[0.12] border border-white/15 text-[var(--text-strong)]"
title="点击加入需求"
>
+ {o.name}
</button>
))}
</div>
) : (
<div className="text-[10px] text-[var(--text-faint)] italic"></div>
)}
</div>
</div>
{/* base prompt可展开编辑 */}
<div className="mt-2.5">
<button
type="button"
onClick={() => setShowPrompt((v) => !v)}
className="w-full text-[10px] text-[var(--text-faint)] hover:text-[var(--text-strong)] inline-flex items-center justify-between"
>
<span> prompt {showPrompt ? "▼" : "▶"}</span>
<span className="font-mono">{editablePrompt.length} </span>
</button>
{showPrompt && (
<textarea
value={editablePrompt}
onChange={(e) => setEditablePrompt(e.target.value)}
rows={3}
className="mt-1 w-full text-[11px] px-2 py-1.5 rounded-md bg-black/30 border border-white/10 text-[var(--text-strong)] resize-none focus:ring-1 focus:ring-rose-400/40 outline-none font-mono"
/>
)}
</div>
{/* 用户额外指令 */}
<div className="mt-2">
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1"></div>
<textarea
value={extra}
onChange={(e) => setExtra(e.target.value)}
rows={2}
placeholder="例:加 SKG logo、换实验室背景、删水印"
className="w-full text-[11.5px] px-2 py-1.5 rounded-md bg-black/30 border border-white/15 text-[var(--text-strong)] placeholder:text-[var(--text-faint)] resize-none focus:ring-2 focus:ring-rose-400/40 outline-none"
/>
</div>
{/* 模型 + 模式 + 生成 */}
<div className="mt-2 flex gap-1.5 items-center">
<select
value={model}
onChange={(e) => setModel(e.target.value)}
className="flex-1 text-[10.5px] px-2 py-1.5 rounded-md bg-black/40 border border-white/15 text-[var(--text-strong)]"
>
<option value="gemini-3-pro-image-preview">nano-banana-pro</option>
<option value="gemini-3.1-flash-image-preview">gemini-3.1-flash-image</option>
<option value="gemini-2.5-flash-image">gemini-2.5-flash-image</option>
</select>
<select
value={mode}
onChange={(e) => setMode(e.target.value as "edit" | "text")}
className="text-[10.5px] px-2 py-1.5 rounded-md bg-black/40 border border-white/15 text-[var(--text-strong)]"
>
<option value="edit">image-to-image</option>
<option value="text">text-only</option>
</select>
</div>
<button
onClick={handleGenerate}
disabled={generating || !editablePrompt.trim()}
className="mt-2 w-full text-[12px] py-2 rounded-md bg-gradient-to-r from-rose-500 to-pink-500 text-white hover:opacity-95 disabled:opacity-40 inline-flex items-center justify-center gap-1.5 font-semibold"
>
{generating ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Sparkles className="h-3.5 w-3.5" />}
{generating ? "生成中…(约 5-15 秒)" : `⚡ 生成 1 张${gens.length > 0 ? "(再来一张)" : ""}`}
</button>
{/* 生成结果网格 */}
{gens.length > 0 && (
<div className="mt-2.5">
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1.5">
({gens.length})
</div>
<div className="grid grid-cols-3 gap-1.5">
{gens.map((g) => (
<button
key={g.id}
onClick={() => handleSelectGen(g.id, g.selected)}
title={`${g.mode} · ${g.model}\n${g.prompt}\n${g.selected ? "已选用(点取消)" : "点击选用"}`}
className={`relative aspect-square rounded-md overflow-hidden border-2 transition ${
g.selected
? "border-emerald-400 ring-2 ring-emerald-400/40"
: "border-white/15 hover:border-white/40"
}`}
>
<img
src={generatedImageUrl(job.id, frame.index, g.id)}
alt={`gen ${g.id}`}
className="absolute inset-0 w-full h-full object-cover"
/>
{g.selected && (
<div className="absolute top-1 right-1 bg-emerald-500 text-white rounded-full p-0.5">
<Check className="h-2.5 w-2.5" />
</div>
)}
</button>
))}
</div>
</div>
)}
</KanbanCard>
)
}

View File

@@ -24,11 +24,22 @@ export interface FrameDescription {
suggested_prompt?: string
}
export interface GeneratedImage {
id: string
prompt: string
model: string
mode: string
url: string
selected: boolean
created_at: number
}
export interface KeyFrame {
index: number
timestamp: number
url: string
description?: FrameDescription | null
generated_images?: GeneratedImage[]
}
export interface TranscriptSegment {
@@ -117,6 +128,42 @@ export async function describeFrame(jobId: string, frameIdx: number): Promise<Jo
return res.json()
}
export async function generateImage(
jobId: string,
frameIdx: number,
body: { prompt: string; extra_prompt?: string; model?: string; mode?: "edit" | "text" },
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
})
if (!res.ok) {
const txt = await res.text().catch(() => "")
throw new Error(`generate ${res.status} ${txt.slice(0, 300)}`)
}
return res.json()
}
export async function selectGenerated(
jobId: string,
frameIdx: number,
genId: string,
selected: boolean,
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/gen/${genId}/select`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ selected }),
})
if (!res.ok) throw new Error(`select ${res.status}`)
return res.json()
}
export function generatedImageUrl(jobId: string, frameIdx: number, genId: string): string {
return `${API_BASE}/jobs/${jobId}/frames/${frameIdx}/gen/${genId}.jpg`
}
export function frameUrl(jobId: string, frameIndex: number): string {
return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}.jpg`
}