auto-save 2026-05-14 04:59 (~3)

This commit is contained in:
2026-05-14 04:59:53 +08:00
parent f5ac97baf4
commit f1f3a0fbe5
3 changed files with 491 additions and 2 deletions

View File

@@ -3270,6 +3270,19 @@
"type": "session-heartbeat",
"message": "Claude 会话活跃 · 最近命令claude · 1 项未提交变更 · 最近提交auto-save 2026-05-14 04:48 (~1)",
"files_changed": 1
},
{
"ts": "2026-05-14T04:54:24+08:00",
"type": "commit",
"message": "auto-save 2026-05-14 04:54 (~1)",
"hash": "f5ac97b",
"files_changed": 1
},
{
"ts": "2026-05-13T20:58:50Z",
"type": "session-heartbeat",
"message": "Codex 会话活跃 · 最近命令codex · 2 项未提交变更 · 最近提交auto-save 2026-05-14 04:54 (~1)",
"files_changed": 2
}
]
}

View File

@@ -92,6 +92,11 @@ FrameExtractTarget = Literal["balanced", "subject", "transition", "expression",
FrameExtractMode = Literal["replace", "append"]
FrameExtractQuality = Literal["auto", "fast", "accurate", "ultra"]
AnalyzeTask = tuple[str, int, FrameExtractTarget, FrameExtractMode, FrameExtractQuality]
AssetBackground = Literal["white", "black"]
AssetSize = Literal["source", "1024", "1536", "2048"]
AssetQuality = Literal["hd"]
SubjectKind = Literal["object", "living"]
SubjectView = Literal["front", "back", "left", "right", "side", "side_walk", "top", "bottom", "expression"]
FRAME_TARGET_LABELS: dict[FrameExtractTarget, str] = {
"balanced": "综合关键帧",
"subject": "清晰主体",
@@ -161,7 +166,7 @@ class StoryboardScene(BaseModel):
class StoryboardImage(BaseModel):
"""用户从各处"上推"到分镜头编排区的图片"""
ref_id: str # uuid hex 8
kind: Literal["keyframe", "cutout"] # keyframe = 关键帧本身 / cutout = 元素提取图
kind: Literal["keyframe", "cutout", "asset"] # asset = 场景 / 主体视角等组图素材
frame_idx: int
element_id: str | None = None # cutout 时
cutout_id: str | None = None # cutout 时versioned id老数据可能 == element_id
@@ -169,6 +174,42 @@ class StoryboardImage(BaseModel):
created_at: float = 0.0
class QualityReport(BaseModel):
width: int = 0
height: int = 0
short_side: int = 0
sharpness: float = 0.0
risk: Literal["ok", "warn", "bad"] = "ok"
warnings: list[str] = Field(default_factory=list)
class SceneAsset(BaseModel):
id: str
label: str = ""
url: str = ""
width: int = 0
height: int = 0
quality: AssetQuality = "hd"
size: AssetSize = "source"
quality_report: QualityReport | None = None
created_at: float = 0.0
class SubjectAsset(BaseModel):
id: str
view: SubjectView
label: str = ""
url: str = ""
width: int = 0
height: int = 0
background: AssetBackground = "white"
quality: AssetQuality = "hd"
size: AssetSize = "source"
source_frame_indices: list[int] = Field(default_factory=list)
ai_completed: bool = True
created_at: float = 0.0
class KeyElement(BaseModel):
"""关键帧里识别 / 用户提取的元素 · 多次提取累积多张图,让用户挑选满意的"""
id: str # uuid hex 8
@@ -182,6 +223,8 @@ class KeyElement(BaseModel):
# 旧字段兼容v1 单图)· 渲染时 fallback 用,新提取不再写入
cutout_id: str | None = None
cutout_background: Literal["white", "black"] = "white"
subject_kind: SubjectKind = "object"
subject_assets: list[SubjectAsset] = Field(default_factory=list)
created_at: float = 0.0
@@ -192,6 +235,8 @@ class KeyFrame(BaseModel):
description: dict | None = None # vision 模型识别结果 {scene, objects, style, suggested_prompt}
cleaned_url: str | None = None # 清洗后干净版(待应用)→ /jobs/{id}/frames/{idx}/cleaned.jpg
cleaned_applied: bool = False # 是否已用清洗版替换原图(替换后 cleaned_url=null
quality_report: QualityReport | None = None
scene_assets: list[SceneAsset] = Field(default_factory=list)
elements: list[KeyElement] = [] # 提取的元素清单(持久化)
storyboard: StoryboardScene | None = None # 分镜头编排字段
generated_images: list[GeneratedImage] = []
@@ -483,6 +528,189 @@ def _scan_profile(duration: float, quality: FrameExtractQuality) -> tuple[float,
return scan_fps, scan_width, metric_width, estimated
def _image_quality_report(img_path: Path, region: dict | None = None) -> QualityReport:
warnings: list[str] = []
try:
with Image.open(img_path) as raw:
img = raw.convert("RGB")
width, height = img.size
metric_width = min(512, width)
metric_height = max(1, round(metric_width * height / max(width, 1)))
small = img.resize((metric_width, metric_height))
gray = np.asarray(ImageOps.grayscale(small), dtype=np.float32)
sharp = _sharpness_from_gray(gray)
except Exception:
return QualityReport(risk="bad", warnings=["无法读取图片质量信息"])
short_side = min(width, height)
if short_side < 720:
warnings.append(f"短边 {short_side}px 低于 720px生视频可能偏糊")
if sharp < 30:
warnings.append("清晰度偏低,高清增强后仍可能有细节损失")
if region:
try:
rw = int(float(region.get("w", 0)) * width)
rh = int(float(region.get("h", 0)) * height)
if min(rw, rh) < 512:
warnings.append(f"主体框约 {rw}×{rh}px主体素材偏小")
except Exception:
pass
risk: Literal["ok", "warn", "bad"] = "ok"
if any("低于" in w or "偏小" in w for w in warnings):
risk = "warn"
if short_side < 480 or sharp < 12:
risk = "bad"
return QualityReport(width=width, height=height, short_side=short_side, sharpness=round(sharp, 2), risk=risk, warnings=warnings)
def _asset_target_size(source_path: Path, size: AssetSize, square: bool = False) -> tuple[int, int]:
try:
with Image.open(source_path) as raw:
src_w, src_h = raw.size
except Exception:
src_w, src_h = 1024, 1024
if size == "source":
return max(1, src_w), max(1, src_h)
side = int(size)
if square:
return side, side
if src_w >= src_h:
return side, max(1, round(side * src_h / max(src_w, 1)))
return max(1, round(side * src_w / max(src_h, 1))), side
def _normalize_asset_image(
img_bytes: bytes,
out_path: Path,
source_path: Path,
size: AssetSize,
background: AssetBackground = "white",
square: bool = False,
) -> tuple[int, int]:
import io as _io
target_w, target_h = _asset_target_size(source_path, size, square=square)
bg = (255, 255, 255) if background == "white" else (0, 0, 0)
out_path.parent.mkdir(parents=True, exist_ok=True)
with Image.open(_io.BytesIO(img_bytes)) as raw:
img = raw.convert("RGB")
img.thumbnail((target_w, target_h), Image.Resampling.LANCZOS)
canvas = Image.new("RGB", (target_w, target_h), bg)
canvas.paste(img, ((target_w - img.width) // 2, (target_h - img.height) // 2))
canvas.save(out_path, "JPEG", quality=95)
return target_w, target_h
def _asset_url(job_id: str, asset_id: str) -> str:
return f"/jobs/{job_id}/assets/{asset_id}.jpg"
def _find_frame(job: Job, idx: int) -> KeyFrame:
frame = next((f for f in job.frames if f.index == idx), None)
if not frame:
raise HTTPException(404, "frame not found")
return frame
def _source_frame_path(job_id: str, idx: int) -> Path:
cleaned_path = job_dir(job_id) / "cleaned" / f"{idx:03d}.jpg"
if cleaned_path.exists():
return cleaned_path
return job_dir(job_id) / "frames" / f"{idx:03d}.jpg"
def _focus_source_for_element(job_id: str, idx: int, el: KeyElement) -> tuple[Path, Path | None]:
import tempfile as _tempfile
src = _source_frame_path(job_id, idx)
tmp_focus: Path | None = None
model_src = src
if not el.region:
return model_src, tmp_focus
try:
im = Image.open(src).convert("RGB")
W, H = im.size
r = el.region
x = max(0.0, min(1.0, float(r.get("x", 0))))
y = max(0.0, min(1.0, float(r.get("y", 0))))
w = max(0.0, min(1.0 - x, float(r.get("w", 0))))
h = max(0.0, min(1.0 - y, float(r.get("h", 0))))
cx, cy = x + w / 2, y + h / 2
ew, eh = w * 1.6, h * 1.6
x0 = max(0.0, cx - ew / 2); y0 = max(0.0, cy - eh / 2)
x1 = min(1.0, cx + ew / 2); y1 = min(1.0, cy + eh / 2)
left, top, right, bottom = int(x0 * W), int(y0 * H), int(x1 * W), int(y1 * H)
if right - left > 8 and bottom - top > 8:
cropped = im.crop((left, top, right, bottom))
tmp = _tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
cropped.save(tmp.name, format="JPEG", quality=92)
tmp.close()
tmp_focus = Path(tmp.name)
model_src = tmp_focus
except Exception as e:
print(f"[focus source crop failed, fallback to full frame] {e}", flush=True)
return model_src, tmp_focus
def _make_reference_contact_sheet(job_id: str, frame_indices: list[int], out_path: Path) -> Path | None:
paths: list[Path] = []
seen: set[int] = set()
for idx in frame_indices:
if idx in seen:
continue
seen.add(idx)
p = _source_frame_path(job_id, idx)
if p.exists():
paths.append(p)
if len(paths) >= 6:
break
if len(paths) <= 1:
return None
thumbs: list[Image.Image] = []
for p in paths:
try:
im = Image.open(p).convert("RGB")
im.thumbnail((420, 420), Image.Resampling.LANCZOS)
canvas = Image.new("RGB", (420, 420), (245, 245, 245))
canvas.paste(im, ((420 - im.width) // 2, (420 - im.height) // 2))
thumbs.append(canvas)
except Exception:
continue
if len(thumbs) <= 1:
return None
cols = 3 if len(thumbs) > 2 else 2
rows = (len(thumbs) + cols - 1) // cols
sheet = Image.new("RGB", (cols * 420, rows * 420), (245, 245, 245))
for i, thumb in enumerate(thumbs):
sheet.paste(thumb, ((i % cols) * 420, (i // cols) * 420))
out_path.parent.mkdir(parents=True, exist_ok=True)
sheet.save(out_path, "JPEG", quality=92)
return out_path
def _subject_view_labels(kind: SubjectKind) -> list[tuple[SubjectView, str]]:
if kind == "living":
return [
("front", "正面站立"),
("back", "背面站立"),
("side", "侧面站立"),
("side_walk", "侧面走路"),
("top", "顶部视角"),
("bottom", "底部视角"),
("expression", "表情参考"),
]
return [
("front", "正面"),
("back", "背面"),
("left", "左侧"),
("right", "右侧"),
("top", "顶部"),
("bottom", "底部"),
]
def _attach_temporal_metrics(items: list[dict]) -> None:
"""相邻低清帧差异:转场 / 动作目标依赖它,不需要逐帧高分辨率扫描。"""
for i, it in enumerate(items):
@@ -1672,6 +1900,19 @@ class UpdateElementReq(BaseModel):
position: str | None = None
class GenerateSceneAssetReq(BaseModel):
quality: AssetQuality = "hd"
size: AssetSize = "source"
class GenerateSubjectAssetsReq(BaseModel):
subject_kind: SubjectKind = "object"
background: AssetBackground = "white"
quality: AssetQuality = "hd"
size: AssetSize = "source"
source_frame_indices: list[int] | None = None
@app.post("/jobs/{job_id}/frames/{idx}/elements", response_model=Job)
def add_element(job_id: str, idx: int, req: AddElementReq) -> Job:
"""加一条元素 · 若 name_en 缺则自动 zh→en 翻译"""
@@ -1787,6 +2028,57 @@ def delete_element(job_id: str, idx: int, element_id: str) -> Job:
return job
@app.post("/jobs/{job_id}/frames/{idx}/scene-asset", response_model=Job)
def generate_scene_asset(job_id: str, idx: int, req: GenerateSceneAssetReq) -> Job:
"""为关键帧生成一张干净、高清的场景参考图。默认一帧只需要一张,重跑会保留历史供人工比对。"""
import time as _time
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = _find_frame(job, idx)
src = _source_frame_path(job_id, idx)
if not src.exists():
raise HTTPException(404, "source frame file missing")
prompt = (
"Create one clean high-definition scene reference image from this frame. "
"Remove watermarks, platform UI, captions, usernames, hashtags, logos, and overlay graphics. "
"Preserve the original camera angle, composition, environment, lighting style, and believable spatial layout. "
"Do not create multiple views. Do not isolate objects. Keep it useful as the scene/background reference for image-to-video generation. "
"Enhance clarity and texture while avoiding over-smoothing or changing important visual details."
)
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
try:
img_bytes, _mode = _image_edit_call(src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
except RuntimeError as e:
raise HTTPException(500, f"scene asset failed: {e}")
asset_id = f"scene_{idx:03d}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
width, height = _normalize_asset_image(img_bytes, out_path, src, req.size, "white", square=False)
report = _image_quality_report(out_path)
scene = SceneAsset(
id=asset_id,
label=f"分镜 {idx + 1} 场景图",
url=_asset_url(job_id, asset_id),
width=width,
height=height,
quality=req.quality,
size=req.size,
quality_report=report,
created_at=_time.time(),
)
new_frames = []
for f in job.frames:
if f.index == idx:
f.quality_report = _image_quality_report(src)
f.scene_assets = (f.scene_assets or []) + [scene]
new_frames.append(f)
update(job, frames=new_frames, message=f"场景图生成完成 · 分镜 {idx + 1}")
return job
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutout", response_model=Job)
def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
"""AI 提取元素 · 每次累积一张新图:
@@ -1881,6 +2173,96 @@ def cutout_element(job_id: str, idx: int, element_id: str) -> Job:
return job
@app.post("/jobs/{job_id}/frames/{idx}/elements/{element_id}/subject-assets", response_model=Job)
def generate_subject_assets(job_id: str, idx: int, element_id: str, req: GenerateSubjectAssetsReq) -> Job:
"""为一个主体生成多视角资产包。
如果传入 source_frame_indices则把多张已选关键帧拼成参考板表示这些帧都在服务同一个主体。"""
import time as _time
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "job not found")
frame = _find_frame(job, idx)
el = next((e for e in frame.elements if e.id == element_id), None)
if not el:
raise HTTPException(404, "element not found")
source_indices = [int(x) for x in (req.source_frame_indices or [idx]) if isinstance(x, int) or str(x).isdigit()]
if idx not in source_indices:
source_indices = [idx] + source_indices
source_indices = list(dict.fromkeys(source_indices))[:6]
model_src, tmp_focus = _focus_source_for_element(job_id, idx, el)
sheet_tmp: Path | None = None
if len(source_indices) > 1:
sheet_tmp = job_dir(job_id) / "tmp" / f"subject_refs_{idx:03d}_{element_id}_{uuid.uuid4().hex[:6]}.jpg"
sheet = _make_reference_contact_sheet(job_id, source_indices, sheet_tmp)
if sheet:
model_src = sheet
target = (el.name_en or el.name_zh).strip()
bg_phrase = "pure white" if req.background == "white" else "pure black"
kind_phrase = "person, animal, or living character" if req.subject_kind == "living" else "object or product-like subject"
models = [IMAGE_MODEL, "gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"]
generated: list[SubjectAsset] = []
try:
for view, view_label in _subject_view_labels(req.subject_kind):
if view == "side_walk":
view_prompt = "side view in a natural walking pose, same identity and proportions"
elif view == "expression":
view_prompt = "clear expression reference, frontal or three-quarter standing pose, preserving the same identity"
else:
view_prompt = f"{view_label} view"
prompt = (
f"Use the reference image(s) to generate a single {view_prompt} of the same {target}. "
f"The subject is a {kind_phrase}. If multiple frames are shown, treat them as evidence of one same subject, not multiple subjects. "
"Preserve identity, proportions, silhouette, material, colors, styling, and distinctive details across all generated views. "
f"Create a high-definition standalone asset on a {bg_phrase} background. "
"No extra objects, no original scene fragments, no text, no watermark, no UI. "
"If the source is incomplete or occluded, intelligently complete missing parts while staying consistent with the reference. "
"For living subjects, keep the body standing and readable; do not create medical, horror, or distorted anatomy."
)
try:
img_bytes, _mode = _image_edit_call(model_src, prompt, models=models, fallback_text=False, max_attempts=3, max_side=1280)
except RuntimeError as e:
raise HTTPException(500, f"subject asset {view} failed: {e}")
asset_id = f"subject_{idx:03d}_{element_id}_{view}_{uuid.uuid4().hex[:8]}"
out_path = job_dir(job_id) / "assets" / f"{asset_id}.jpg"
width, height = _normalize_asset_image(img_bytes, out_path, _source_frame_path(job_id, idx), req.size, req.background, square=False)
generated.append(SubjectAsset(
id=asset_id,
view=view,
label=f"{el.name_zh} · {view_label}",
url=_asset_url(job_id, asset_id),
width=width,
height=height,
background=req.background,
quality=req.quality,
size=req.size,
source_frame_indices=source_indices,
created_at=_time.time(),
))
finally:
for p in (tmp_focus, sheet_tmp):
if p and p.exists():
try: p.unlink()
except OSError: pass
src = _source_frame_path(job_id, idx)
new_frames = []
for f in job.frames:
if f.index == idx:
f.quality_report = _image_quality_report(src, el.region)
for e in f.elements:
if e.id == element_id:
e.subject_kind = req.subject_kind
e.cutout_background = req.background
e.subject_assets = (e.subject_assets or []) + generated
new_frames.append(f)
update(job, frames=new_frames, message=f"主体资产包生成完成 · {el.name_zh} · {len(generated)}")
return job
@app.delete("/jobs/{job_id}/frames/{idx}/elements/{element_id}/cutouts/{cutout_id}", response_model=Job)
def delete_cutout(job_id: str, idx: int, element_id: str, cutout_id: str) -> Job:
"""删除该元素的某张提取图"""

View File

@@ -44,6 +44,8 @@ export interface KeyElement {
cutouts?: string[] // v2 多张提取图 id 列表
cutout_id?: string | null // v1 兼容字段
cutout_background?: "white" | "black"
subject_kind?: SubjectKind
subject_assets?: SubjectAsset[]
created_at?: number
}
@@ -123,6 +125,8 @@ export interface KeyFrame {
description?: FrameDescription | null
cleaned_url?: string | null
cleaned_applied?: boolean
quality_report?: QualityReport | null
scene_assets?: SceneAsset[]
elements?: KeyElement[]
storyboard?: StoryboardScene | null
generated_images?: GeneratedImage[]
@@ -131,6 +135,46 @@ export interface KeyFrame {
export type FrameExtractTarget = "balanced" | "subject" | "transition" | "expression" | "motion"
export type FrameExtractMode = "replace" | "append"
export type FrameExtractQuality = "auto" | "fast" | "accurate" | "ultra"
export type AssetBackground = "white" | "black"
export type AssetSize = "source" | "1024" | "1536" | "2048"
export type SubjectKind = "object" | "living"
export type SubjectView = "front" | "back" | "left" | "right" | "side" | "side_walk" | "top" | "bottom" | "expression"
export interface QualityReport {
width: number
height: number
short_side: number
sharpness: number
risk: "ok" | "warn" | "bad"
warnings: string[]
}
export interface SceneAsset {
id: string
label: string
url: string
width: number
height: number
quality: "hd"
size: AssetSize
quality_report?: QualityReport | null
created_at: number
}
export interface SubjectAsset {
id: string
view: SubjectView
label: string
url: string
width: number
height: number
background: AssetBackground
quality: "hd"
size: AssetSize
source_frame_indices?: number[]
ai_completed?: boolean
created_at: number
}
export interface TranscriptSegment {
index: number
@@ -142,7 +186,7 @@ export interface TranscriptSegment {
export interface StoryboardImage {
ref_id: string
kind: "keyframe" | "cutout"
kind: "keyframe" | "cutout" | "asset"
frame_idx: number
element_id?: string | null
cutout_id?: string | null
@@ -373,6 +417,10 @@ export function cutoutUrl(jobId: string, frameIndex: number, elementId: string,
return `${API_BASE}/jobs/${jobId}/frames/${frameIndex}/elements/${elementId}/cutout.jpg`
}
export function jobAssetUrl(jobId: string, assetId: string): string {
return `${API_BASE}/jobs/${jobId}/assets/${assetId}.jpg`
}
// 兼容 v1 (cutout_id) / v2 (cutouts 数组) — 返回"有没有提取图"
export function hasCutout(e: KeyElement): boolean {
return (Array.isArray(e.cutouts) && e.cutouts.length > 0) || !!e.cutout_id
@@ -601,3 +649,49 @@ export async function cutoutElement(jobId: string, frameIdx: number, elementId:
}
return res.json()
}
export async function generateSceneAsset(
jobId: string,
frameIdx: number,
body: { size?: AssetSize } = {},
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/scene-asset`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ quality: "hd", size: body.size ?? "source" }),
})
if (!res.ok) {
const txt = await res.text().catch(() => "")
throw new Error(`sceneAsset ${res.status} ${txt.slice(0, 300)}`)
}
return res.json()
}
export async function generateSubjectAssets(
jobId: string,
frameIdx: number,
elementId: string,
body: {
subject_kind?: SubjectKind
background?: AssetBackground
size?: AssetSize
source_frame_indices?: number[]
} = {},
): Promise<Job> {
const res = await fetch(`${API_BASE}/jobs/${jobId}/frames/${frameIdx}/elements/${elementId}/subject-assets`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
quality: "hd",
subject_kind: body.subject_kind ?? "object",
background: body.background ?? "white",
size: body.size ?? "source",
source_frame_indices: body.source_frame_indices ?? null,
}),
})
if (!res.ok) {
const txt = await res.text().catch(() => "")
throw new Error(`subjectAssets ${res.status} ${txt.slice(0, 300)}`)
}
return res.json()
}