auto-save 2026-05-18 15:29 (+1, ~5)

2026-05-18 15:29:47 +08:00
parent 408c5fca47
commit 1c451c6ab3
6 changed files with 616 additions and 6 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -25,10 +25,19 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field

+from database import create_database, default_database_url, default_workflow_mode, infer_source_kind, storage_prefix
+
 load_dotenv()

 JOBS_DIR = Path(os.getenv("JOBS_DIR", "./jobs")).resolve()
 JOBS_DIR.mkdir(parents=True, exist_ok=True)
+DATABASE_URL = default_database_url(JOBS_DIR)
+DB_INIT_ERROR = ""
+try:
+    DB = create_database(DATABASE_URL, JOBS_DIR)
+except Exception as e:
+    DB = None
+    DB_INIT_ERROR = str(e)
 CORS_ORIGINS = [o.strip() for o in os.getenv("CORS_ORIGINS", "http://localhost:4290,http://127.0.0.1:4290").split(",") if o.strip()]
 PRODUCT_LIBRARY_DIR = Path(
    os.getenv("PRODUCT_LIBRARY_DIR", Path(__file__).resolve().parent / "product_library" / "skg-products")
@@ -542,6 +551,10 @@ class AudioScript(BaseModel):
 class Job(BaseModel):
    id: str
    url: str
+    document_id: str = ""
+    source_kind: Literal["tiktok_link", "upload", "unknown"] = "unknown"
+    workflow_mode: Literal["feed_recreation", "uploaded_reference"] = "feed_recreation"
+    storage_prefix: str = ""
    status: JobStatus = "created"
    progress: int = 0
    message: str = ""
@@ -641,8 +654,26 @@ def job_with_artifacts(job: Job) -> Job:
    return job.model_copy(update=updates)


+def ensure_job_document_fields(job: Job) -> Job:
+    source_kind = job.source_kind if job.source_kind != "unknown" else infer_source_kind(job.url)
+    workflow_mode = job.workflow_mode or default_workflow_mode(source_kind)
+    document_id = job.document_id or job.id
+    job.source_kind = source_kind if source_kind in {"tiktok_link", "upload"} else "unknown"
+    job.workflow_mode = workflow_mode if workflow_mode in {"feed_recreation", "uploaded_reference"} else "feed_recreation"
+    job.document_id = document_id
+    job.storage_prefix = job.storage_prefix or storage_prefix(document_id, job.source_kind, job.workflow_mode)
+    return job
+
+
 def save_state(job: Job) -> None:
-    (job_dir(job.id) / "state.json").write_text(job.model_dump_json(indent=2))
+    ensure_job_document_fields(job)
+    d = job_dir(job.id)
+    (d / "state.json").write_text(job.model_dump_json(indent=2))
+    if DB:
+        try:
+            DB.sync_job(job.model_dump(mode="json"), d)
+        except Exception as e:
+            print(f"[database sync failed] job={job.id} error={e}", flush=True)


 def update(job: Job, **kw) -> None:
@@ -3024,6 +3055,7 @@ def health() -> dict:
        "base_url": LLM_BASE_URL or "openai-default",
        "image_base_url": IMAGE_BASE_URL or LLM_BASE_URL or "openai-default",
        "voice_base_url": AZURE_OPENAI_BASE_URL,
+        "database": DB.health() if DB else {"enabled": False, "url": DATABASE_URL, "error": DB_INIT_ERROR},
        "models": {
            "asr": ASR_MODEL,
            "local_asr": LOCAL_ASR_MODEL,
@@ -3059,6 +3091,9 @@ def health() -> dict:

 class JobSummary(BaseModel):
    id: str
+    document_id: str = ""
+    source_kind: str = "unknown"
+    workflow_mode: str = "feed_recreation"
    url: str
    status: JobStatus
    progress: int = 0
@@ -3074,6 +3109,29 @@ class JobSummary(BaseModel):
    mtime: float = 0.0


+class DocumentSummary(BaseModel):
+    id: str
+    title: str
+    source_kind: str
+    workflow_mode: str
+    source_url: str = ""
+    primary_job_id: str = ""
+    status: str = "created"
+    storage_prefix: str = ""
+    job_count: int = 0
+    asset_count: int = 0
+    created_at: float = 0.0
+    updated_at: float = 0.0
+
+
+@app.get("/documents", response_model=list[DocumentSummary])
+def list_documents(limit: int | None = None) -> list[DocumentSummary]:
+    if not DB:
+        return []
+    rows = DB.list_documents(limit)
+    return [DocumentSummary(**row) for row in rows]
+
+
@app.get("/jobs", response_model=list[JobSummary])
 def list_jobs(limit: int | None = None) -> list[JobSummary]:
    """所有 job 的精简列表，按磁盘 state.json mtime 倒序（最新优先）。前端无 ?job= 时用它回填历史。"""
@@ -3082,8 +3140,12 @@ def list_jobs(limit: int | None = None) -> list[JobSummary]:
        state_path = JOBS_DIR / job_id / "state.json"
        mtime = state_path.stat().st_mtime if state_path.exists() else 0.0
        thumb = f"/jobs/{job_id}/frames/{job.frames[0].index}.jpg" if job.frames else ""
+        ensure_job_document_fields(job)
        items.append(JobSummary(
            id=job.id,
+            document_id=job.document_id,
+            source_kind=job.source_kind,
+            workflow_mode=job.workflow_mode,
            url=job.url,
            status=job.status,
            progress=job.progress,
@@ -3109,7 +3171,7 @@ async def create_job(req: CreateJobReq, bg: BackgroundTasks) -> Job:
    if not req.url.strip():
        raise HTTPException(400, "url required")
    job_id = uuid.uuid4().hex[:12]
-    job = Job(id=job_id, url=req.url.strip())
+    job = Job(id=job_id, url=req.url.strip(), document_id=job_id, source_kind="tiktok_link", workflow_mode="feed_recreation")
    JOBS[job_id] = job
    save_state(job)
    bg.add_task(pipeline_download, job_id)
@@ -3133,7 +3195,7 @@ async def create_job_from_upload(bg: BackgroundTasks, file: UploadFile = File(..
    if not mp4.exists() or mp4.stat().st_size == 0:
        raise HTTPException(500, "upload failed")

-    job = Job(id=job_id, url=f"upload://{file.filename}")
+    job = Job(id=job_id, url=f"upload://{file.filename}", document_id=job_id, source_kind="upload", workflow_mode="uploaded_reference")
    JOBS[job_id] = job
    save_state(job)
    bg.add_task(pipeline_download, job_id)
@@ -3223,6 +3285,11 @@ def delete_job(job_id: str) -> dict[str, bool | str]:
    job = JOBS.pop(job_id, None)
    if not job and not d.exists():
        raise HTTPException(404, "job not found")
+    if DB:
+        try:
+            DB.delete_job(job_id)
+        except Exception as e:
+            print(f"[database delete failed] job={job_id} error={e}", flush=True)
    if d.exists():
        shutil.rmtree(d)
    return {"ok": True, "id": job_id}