auto-save 2026-05-13 00:22 (~4)
This commit is contained in:
@@ -524,6 +524,13 @@
|
||||
"message": "auto-save 2026-05-13 00:11 (~1)",
|
||||
"hash": "703d94f",
|
||||
"files_changed": 1
|
||||
},
|
||||
{
|
||||
"ts": "2026-05-13T00:17:29+08:00",
|
||||
"type": "commit",
|
||||
"message": "auto-save 2026-05-13 00:17 (~1)",
|
||||
"hash": "2d7c6cc",
|
||||
"files_changed": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
140
api/main.py
140
api/main.py
@@ -28,6 +28,7 @@ ASR_MODEL = os.getenv("ASR_MODEL", "whisper-1")
|
||||
TRANSLATE_MODEL = os.getenv("TRANSLATE_MODEL", "gemini-2.5-flash")
|
||||
REWRITE_MODEL = os.getenv("REWRITE_MODEL", "gemini-2.5-pro")
|
||||
VISION_MODEL = os.getenv("VISION_MODEL", "gemini-2.5-flash")
|
||||
IMAGE_MODEL = os.getenv("IMAGE_MODEL", "gemini-3-pro-image-preview")
|
||||
|
||||
# OpenAI 客户端(OpenAI 兼容网关,含 SKG ezlink)
|
||||
from openai import OpenAI
|
||||
@@ -52,11 +53,22 @@ JobStatus = Literal[
|
||||
KEYFRAME_COUNT = int(os.getenv("KEYFRAME_COUNT", "5"))
|
||||
|
||||
|
||||
class GeneratedImage(BaseModel):
|
||||
id: str # uuid hex 12
|
||||
prompt: str
|
||||
model: str
|
||||
mode: str = "edit" # "edit"(带参考图) | "text"(纯文字)
|
||||
url: str # /jobs/{job_id}/frames/{idx}/gen/{id}.jpg
|
||||
selected: bool = False
|
||||
created_at: float = 0.0
|
||||
|
||||
|
||||
class KeyFrame(BaseModel):
|
||||
index: int
|
||||
timestamp: float
|
||||
url: str
|
||||
description: dict | None = None # vision 模型识别结果 {scene, objects, style, suggested_prompt}
|
||||
generated_images: list[GeneratedImage] = []
|
||||
|
||||
|
||||
class TranscriptSegment(BaseModel):
|
||||
@@ -583,6 +595,134 @@ def get_frame(job_id: str, idx: int):
|
||||
return FileResponse(p, media_type="image/jpeg")
|
||||
|
||||
|
||||
class GenerateReq(BaseModel):
|
||||
prompt: str
|
||||
extra_prompt: str = ""
|
||||
model: str = "" # 留空用 IMAGE_MODEL 默认
|
||||
mode: str = "edit" # "edit" 带参考图,"text" 纯文字
|
||||
|
||||
|
||||
@app.post("/jobs/{job_id}/frames/{idx}/generate", response_model=Job)
|
||||
def generate_image(job_id: str, idx: int, req: GenerateReq) -> Job:
|
||||
"""根据关键帧 + prompt 生成新图(image-to-image 或 text-to-image)"""
|
||||
job = JOBS.get(job_id)
|
||||
if not job:
|
||||
raise HTTPException(404, "job not found")
|
||||
frame = next((f for f in job.frames if f.index == idx), None)
|
||||
if not frame:
|
||||
raise HTTPException(404, "frame not found")
|
||||
frame_path = job_dir(job_id) / "frames" / f"{idx:03d}.jpg"
|
||||
if not frame_path.exists():
|
||||
raise HTTPException(404, "frame file missing")
|
||||
|
||||
full_prompt = req.prompt.strip()
|
||||
if req.extra_prompt.strip():
|
||||
full_prompt = f"{full_prompt}. {req.extra_prompt.strip()}"
|
||||
if not full_prompt:
|
||||
raise HTTPException(400, "prompt required")
|
||||
|
||||
model = req.model or IMAGE_MODEL
|
||||
gen_id = uuid.uuid4().hex[:12]
|
||||
|
||||
import base64 as b64lib
|
||||
import time as _time
|
||||
|
||||
try:
|
||||
if req.mode == "edit":
|
||||
# image-to-image:用 generations 端点带 image 参数
|
||||
img_b64 = b64lib.b64encode(frame_path.read_bytes()).decode("ascii")
|
||||
data_uri = f"data:image/jpeg;base64,{img_b64}"
|
||||
# OpenAI SDK 不直接支持 image 参数,用底层 httpx
|
||||
import httpx
|
||||
with httpx.Client(timeout=120) as client:
|
||||
r = client.post(
|
||||
f"{LLM_BASE_URL}/images/generations",
|
||||
headers={
|
||||
"Authorization": f"Bearer {LLM_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": full_prompt,
|
||||
"image": data_uri,
|
||||
"n": 1,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
resp_data = r.json()
|
||||
else:
|
||||
# text-only
|
||||
resp = llm().images.generate(model=model, prompt=full_prompt, n=1)
|
||||
resp_data = resp.model_dump() if hasattr(resp, "model_dump") else {"data": [{"b64_json": resp.data[0].b64_json}]}
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise HTTPException(500, f"image gen HTTP {e.response.status_code}: {e.response.text[:300]}")
|
||||
except Exception as e:
|
||||
raise HTTPException(500, f"image gen failed: {e}")
|
||||
|
||||
data_arr = resp_data.get("data", [])
|
||||
if not data_arr:
|
||||
raise HTTPException(500, "image gen returned no data")
|
||||
|
||||
item = data_arr[0]
|
||||
b64 = item.get("b64_json")
|
||||
if not b64:
|
||||
raise HTTPException(500, "image gen returned no b64_json")
|
||||
|
||||
# 保存到本地 jobs/<id>/gen/<idx>_<gen_id>.jpg
|
||||
gen_dir = job_dir(job_id) / "gen"
|
||||
gen_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = gen_dir / f"{idx:03d}_{gen_id}.jpg"
|
||||
out_path.write_bytes(b64lib.b64decode(b64))
|
||||
|
||||
new_gen = GeneratedImage(
|
||||
id=gen_id,
|
||||
prompt=full_prompt,
|
||||
model=model,
|
||||
mode=req.mode,
|
||||
url=f"/jobs/{job_id}/frames/{idx}/gen/{gen_id}.jpg",
|
||||
selected=False,
|
||||
created_at=_time.time(),
|
||||
)
|
||||
|
||||
# 写回 job.frames
|
||||
for f in job.frames:
|
||||
if f.index == idx:
|
||||
f.generated_images = f.generated_images + [new_gen]
|
||||
update(job, frames=job.frames, message=f"生图完成 · 分镜 {idx + 1}")
|
||||
return job
|
||||
|
||||
|
||||
@app.get("/jobs/{job_id}/frames/{idx}/gen/{gen_id}.jpg")
|
||||
def get_generated_image(job_id: str, idx: int, gen_id: str):
|
||||
p = job_dir(job_id) / "gen" / f"{idx:03d}_{gen_id}.jpg"
|
||||
if not p.exists():
|
||||
raise HTTPException(404, "generated image not found")
|
||||
return FileResponse(p, media_type="image/jpeg")
|
||||
|
||||
|
||||
class SelectGenReq(BaseModel):
|
||||
selected: bool
|
||||
|
||||
|
||||
@app.post("/jobs/{job_id}/frames/{idx}/gen/{gen_id}/select", response_model=Job)
|
||||
def select_generated(job_id: str, idx: int, gen_id: str, req: SelectGenReq) -> Job:
|
||||
job = JOBS.get(job_id)
|
||||
if not job:
|
||||
raise HTTPException(404, "job not found")
|
||||
for f in job.frames:
|
||||
if f.index != idx:
|
||||
continue
|
||||
for g in f.generated_images:
|
||||
# 单选:该帧只能选一张
|
||||
if g.id == gen_id:
|
||||
g.selected = req.selected
|
||||
else:
|
||||
g.selected = False
|
||||
break
|
||||
update(job, frames=job.frames)
|
||||
return job
|
||||
|
||||
|
||||
@app.post("/jobs/{job_id}/frames/{idx}/describe", response_model=Job)
|
||||
def describe_frame(job_id: str, idx: int) -> Job:
|
||||
"""调 vision 模型识别该关键帧,返回结构化描述。"""
|
||||
|
||||
@@ -6,9 +6,10 @@ import {
|
||||
Mic, Languages, FileEdit, Sparkles, Film, FileVideo, Loader2, Plus, Check,
|
||||
ChevronDown, X,
|
||||
} from "lucide-react"
|
||||
import { type Job, frameUrl, videoUrl } from "@/lib/api"
|
||||
import { type Job, type KeyFrame, frameUrl, videoUrl, generateImage, selectGenerated, generatedImageUrl } from "@/lib/api"
|
||||
import { type NodeData } from "@/components/nodes"
|
||||
import { FrameLightbox } from "@/components/lightbox"
|
||||
import { toast } from "sonner"
|
||||
|
||||
type ColType = "input" | "process" | "ai" | "output"
|
||||
const TYPE_GRAD: Record<ColType, string> = {
|
||||
@@ -518,34 +519,19 @@ export function Dashboard({ data }: Props) {
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* ---- ImageGen — Kanban ---- */}
|
||||
{/* ---- ImageGen — 选中关键帧每张一卡,生成 + 多版本 ---- */}
|
||||
{key === "imagegen" && (
|
||||
<>
|
||||
<KanbanCard tone="rose" tags={["推荐"]} title="nano-banana-pro">
|
||||
<div className="text-[11px] text-[var(--text-soft)]">Gemini 3 Pro Image · 走 SKG 网关 /v1/images/generations</div>
|
||||
data.selectedFrames.size === 0 ? (
|
||||
<KanbanCard tone="pink" tags={["待启动"]} title="未选关键帧">
|
||||
<div className="text-[11px] text-[var(--text-soft)]">在「关键帧」节点勾选 1+ 张后,每张关键帧 → 1 张生成图</div>
|
||||
</KanbanCard>
|
||||
<KanbanCard tone="rose" tags={["备选"]} title="gpt-image-2">
|
||||
<div className="text-[11px] text-[var(--text-soft)]">OpenAI · SKG 网关待开通</div>
|
||||
</KanbanCard>
|
||||
{data.selectedFrames.size === 0 ? (
|
||||
<KanbanCard tone="pink" tags={["待启动"]} title="未选关键帧">
|
||||
<div className="text-[11px] text-[var(--text-soft)]">在「关键帧」节点勾选 1+ 张后,每张关键帧 → 1 张生成图</div>
|
||||
</KanbanCard>
|
||||
) : (
|
||||
Array.from({ length: data.selectedFrames.size }).map((_, i) => (
|
||||
<KanbanCard
|
||||
key={i}
|
||||
tone="pink"
|
||||
tags={[`生成图 ${i + 1}`]}
|
||||
title={`分镜 ${i + 1} → AI 图`}
|
||||
>
|
||||
<div className="aspect-video bg-black/40 rounded-md flex items-center justify-center text-[11px] text-[var(--text-faint)]">
|
||||
待生成
|
||||
</div>
|
||||
</KanbanCard>
|
||||
))
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
Array.from(data.selectedFrames).sort((a, b) => a - b).map((frameIdx) => {
|
||||
const f = data.job?.frames.find((x) => x.index === frameIdx)
|
||||
if (!f || !data.job) return null
|
||||
return <ImageGenCard key={frameIdx} job={data.job} frame={f} onJobUpdate={data.onJobUpdate} />
|
||||
})
|
||||
)
|
||||
)}
|
||||
|
||||
{/* ---- VideoGen — Kanban ---- */}
|
||||
@@ -584,3 +570,190 @@ export function Dashboard({ data }: Props) {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
ImageGenCard — 单张关键帧的生图卡
|
||||
============================================================ */
|
||||
function ImageGenCard({ job, frame, onJobUpdate }: {
|
||||
job: Job
|
||||
frame: KeyFrame
|
||||
onJobUpdate: (j: Job) => void
|
||||
}) {
|
||||
const [extra, setExtra] = useState("")
|
||||
const [model, setModel] = useState("gemini-3-pro-image-preview")
|
||||
const [mode, setMode] = useState<"edit" | "text">("edit")
|
||||
const [generating, setGenerating] = useState(false)
|
||||
const basePrompt = frame.description?.suggested_prompt ?? "(尚未识别 · 点关键帧打开 lightbox 先识别)"
|
||||
const [editablePrompt, setEditablePrompt] = useState(basePrompt)
|
||||
const [showPrompt, setShowPrompt] = useState(false)
|
||||
|
||||
// 当 vision 识别完成后更新默认 prompt
|
||||
useEffect(() => {
|
||||
if (frame.description?.suggested_prompt) {
|
||||
setEditablePrompt(frame.description.suggested_prompt)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [frame.description?.suggested_prompt])
|
||||
|
||||
const handleGenerate = async () => {
|
||||
if (!editablePrompt.trim()) {
|
||||
toast.error("请先填写 prompt(点上方关键帧识别会自动生成)")
|
||||
return
|
||||
}
|
||||
setGenerating(true)
|
||||
try {
|
||||
const updated = await generateImage(job.id, frame.index, {
|
||||
prompt: editablePrompt,
|
||||
extra_prompt: extra,
|
||||
model,
|
||||
mode,
|
||||
})
|
||||
onJobUpdate(updated)
|
||||
toast.success(`分镜 ${frame.index + 1} 生成完成`)
|
||||
} catch (e) {
|
||||
toast.error("生图失败:" + (e instanceof Error ? e.message : String(e)))
|
||||
} finally {
|
||||
setGenerating(false)
|
||||
}
|
||||
}
|
||||
|
||||
const handleSelectGen = async (genId: string, currentlySelected: boolean) => {
|
||||
try {
|
||||
const updated = await selectGenerated(job.id, frame.index, genId, !currentlySelected)
|
||||
onJobUpdate(updated)
|
||||
} catch (e) {
|
||||
toast.error("选用失败:" + (e instanceof Error ? e.message : String(e)))
|
||||
}
|
||||
}
|
||||
|
||||
const gens = frame.generated_images ?? []
|
||||
const objects = frame.description?.objects ?? []
|
||||
|
||||
return (
|
||||
<KanbanCard tone="rose" tags={[`分镜 ${frame.index + 1}`, `${frame.timestamp.toFixed(1)}s`]} title="生成参考图">
|
||||
{/* 参考图 + 识别物体 chips */}
|
||||
<div className="flex gap-2 items-start mt-1">
|
||||
<img
|
||||
src={frameUrl(job.id, frame.index)}
|
||||
alt={`frame ${frame.index}`}
|
||||
className="rounded-md object-cover flex-shrink-0"
|
||||
style={{ width: 96, aspectRatio: `${job.width}/${job.height}` }}
|
||||
/>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1">识别元素</div>
|
||||
{objects.length > 0 ? (
|
||||
<div className="flex flex-wrap gap-1">
|
||||
{objects.slice(0, 6).map((o, i) => (
|
||||
<button
|
||||
key={i}
|
||||
onClick={() => setExtra((p) => p ? `${p}, ${o.name}` : o.name)}
|
||||
className="text-[10px] px-1.5 py-0.5 rounded-full bg-white/[0.06] hover:bg-white/[0.12] border border-white/15 text-[var(--text-strong)]"
|
||||
title="点击加入需求"
|
||||
>
|
||||
+ {o.name}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-[10px] text-[var(--text-faint)] italic">未识别(点上方缩略图打开识别)</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* base prompt(可展开编辑) */}
|
||||
<div className="mt-2.5">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowPrompt((v) => !v)}
|
||||
className="w-full text-[10px] text-[var(--text-faint)] hover:text-[var(--text-strong)] inline-flex items-center justify-between"
|
||||
>
|
||||
<span>基础 prompt {showPrompt ? "▼" : "▶"}</span>
|
||||
<span className="font-mono">{editablePrompt.length} 字符</span>
|
||||
</button>
|
||||
{showPrompt && (
|
||||
<textarea
|
||||
value={editablePrompt}
|
||||
onChange={(e) => setEditablePrompt(e.target.value)}
|
||||
rows={3}
|
||||
className="mt-1 w-full text-[11px] px-2 py-1.5 rounded-md bg-black/30 border border-white/10 text-[var(--text-strong)] resize-none focus:ring-1 focus:ring-rose-400/40 outline-none font-mono"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* 用户额外指令 */}
|
||||
<div className="mt-2">
|
||||
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1">我的需求(额外指令)</div>
|
||||
<textarea
|
||||
value={extra}
|
||||
onChange={(e) => setExtra(e.target.value)}
|
||||
rows={2}
|
||||
placeholder="例:加 SKG logo、换实验室背景、删水印"
|
||||
className="w-full text-[11.5px] px-2 py-1.5 rounded-md bg-black/30 border border-white/15 text-[var(--text-strong)] placeholder:text-[var(--text-faint)] resize-none focus:ring-2 focus:ring-rose-400/40 outline-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* 模型 + 模式 + 生成 */}
|
||||
<div className="mt-2 flex gap-1.5 items-center">
|
||||
<select
|
||||
value={model}
|
||||
onChange={(e) => setModel(e.target.value)}
|
||||
className="flex-1 text-[10.5px] px-2 py-1.5 rounded-md bg-black/40 border border-white/15 text-[var(--text-strong)]"
|
||||
>
|
||||
<option value="gemini-3-pro-image-preview">nano-banana-pro</option>
|
||||
<option value="gemini-3.1-flash-image-preview">gemini-3.1-flash-image</option>
|
||||
<option value="gemini-2.5-flash-image">gemini-2.5-flash-image</option>
|
||||
</select>
|
||||
<select
|
||||
value={mode}
|
||||
onChange={(e) => setMode(e.target.value as "edit" | "text")}
|
||||
className="text-[10.5px] px-2 py-1.5 rounded-md bg-black/40 border border-white/15 text-[var(--text-strong)]"
|
||||
>
|
||||
<option value="edit">image-to-image</option>
|
||||
<option value="text">text-only</option>
|
||||
</select>
|
||||
</div>
|
||||
<button
|
||||
onClick={handleGenerate}
|
||||
disabled={generating || !editablePrompt.trim()}
|
||||
className="mt-2 w-full text-[12px] py-2 rounded-md bg-gradient-to-r from-rose-500 to-pink-500 text-white hover:opacity-95 disabled:opacity-40 inline-flex items-center justify-center gap-1.5 font-semibold"
|
||||
>
|
||||
{generating ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <Sparkles className="h-3.5 w-3.5" />}
|
||||
{generating ? "生成中…(约 5-15 秒)" : `⚡ 生成 1 张${gens.length > 0 ? "(再来一张)" : ""}`}
|
||||
</button>
|
||||
|
||||
{/* 生成结果网格 */}
|
||||
{gens.length > 0 && (
|
||||
<div className="mt-2.5">
|
||||
<div className="text-[10px] text-[var(--text-faint)] uppercase tracking-widest mb-1.5">
|
||||
生成结果 ({gens.length})
|
||||
</div>
|
||||
<div className="grid grid-cols-3 gap-1.5">
|
||||
{gens.map((g) => (
|
||||
<button
|
||||
key={g.id}
|
||||
onClick={() => handleSelectGen(g.id, g.selected)}
|
||||
title={`${g.mode} · ${g.model}\n${g.prompt}\n${g.selected ? "已选用(点取消)" : "点击选用"}`}
|
||||
className={`relative aspect-square rounded-md overflow-hidden border-2 transition ${
|
||||
g.selected
|
||||
? "border-emerald-400 ring-2 ring-emerald-400/40"
|
||||
: "border-white/15 hover:border-white/40"
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={generatedImageUrl(job.id, frame.index, g.id)}
|
||||
alt={`gen ${g.id}`}
|
||||
className="absolute inset-0 w-full h-full object-cover"
|
||||
/>
|
||||
{g.selected && (
|
||||
<div className="absolute top-1 right-1 bg-emerald-500 text-white rounded-full p-0.5">
|
||||
<Check className="h-2.5 w-2.5" />
|
||||
</div>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</KanbanCard>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -24,11 +24,22 @@ export interface FrameDescription {
|
||||
suggested_prompt?: string
|
||||
}
|
||||
|
||||
export interface GeneratedImage {
|
||||
id: string
|
||||
prompt: string
|
||||
model: string
|
||||
mode: string
|
||||
url: string
|
||||
selected: boolean
|
||||
created_at: number
|
||||
}
|
||||
|
||||
export interface KeyFrame {
|
||||
index: number
|
||||
timestamp: number
|
||||
url: string
|
||||
description?: FrameDescription | null
|
||||
generated_images?: GeneratedImage[]
|
||||
}
|
||||
|
||||
export interface TranscriptSegment {
|
||||
@@ -117,6 +128,42 @@ export async function describeFrame(jobId: string, frameIdx: number): Promise<Jo
|
||||
return res.json()
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
jobId: string,
|
||||
frameIdx: number,
|
||||
body: { prompt: string; extra_prompt?: string; model?: string; mode?: "edit" | "text" },
|
||||
): Promise<Job> {
|
||||
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/generate`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const txt = await res.text().catch(() => "")
|
||||
throw new Error(`generate ${res.status} ${txt.slice(0, 300)}`)
|
||||
}
|
||||
return res.json()
|
||||
}
|
||||
|
||||
export async function selectGenerated(
|
||||
jobId: string,
|
||||
frameIdx: number,
|
||||
genId: string,
|
||||
selected: boolean,
|
||||
): Promise<Job> {
|
||||
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/gen/${genId}/select`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ selected }),
|
||||
})
|
||||
if (!res.ok) throw new Error(`select ${res.status}`)
|
||||
return res.json()
|
||||
}
|
||||
|
||||
export function generatedImageUrl(jobId: string, frameIdx: number, genId: string): string {
|
||||
return `${API_BASE}/jobs/${jobId}/frames/${frameIdx}/gen/${genId}.jpg`
|
||||
}
|
||||
|
||||
export function frameUrl(jobId: string, frameIndex: number): string {
|
||||
return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}.jpg`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user