fix(api): harden subprocess/SSRF/concurrency and add db pool

- run(): add timeout (download 600s via DOWNLOAD_TIMEOUT_SECONDS, else 300s);
  TimeoutExpired now kills the child and fails the job instead of hanging forever
- create_job: validate_source_url() rejects file://, private/loopback/link-local
  IPs and off-allowlist hosts (SOURCE_URL_ALLOWED_HOSTS) — closes SSRF/local-read
- per-job RLock guards save_state/update/update_generated_video and the retry
  check-and-set so concurrent video workers can't clobber state.json
- db: psycopg_pool connection pool (graceful fallback if unavailable); write
  failures surfaced via logging.error instead of silent print
- read-only media GET routes use job_path() (no mkdir) to stop empty-dir spam
- wrap remaining Image.open() in with-blocks to avoid fd leaks

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 02:04:59 +08:00
parent 56ea8aef11
commit 3ed3f721eb
3 changed files with 191 additions and 62 deletions

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import logging
import os
import threading
import time
import uuid
from datetime import datetime, timezone
@@ -15,18 +17,53 @@ except ModuleNotFoundError: # Local dev can still run without Postgres deps ins
dict_row = None
Jsonb = None
try:
from psycopg_pool import ConnectionPool
except ModuleNotFoundError: # Pool is optional; fall back to per-call connections.
ConnectionPool = None
logger = logging.getLogger("skg.db")
DATABASE_URL = os.getenv("DATABASE_URL", "").strip()
DB_ENABLED = bool(DATABASE_URL and psycopg is not None)
_POOL = None
_POOL_LOCK = threading.Lock()
def enabled() -> bool:
return DB_ENABLED
def _pool():
"""Lazily build a process-wide connection pool so concurrent workers/requests
don't exhaust Postgres by opening a fresh connection per query."""
global _POOL
if _POOL is not None:
return _POOL
with _POOL_LOCK:
if _POOL is None:
pool = ConnectionPool(
DATABASE_URL,
min_size=1,
max_size=int(os.getenv("DB_POOL_MAX_SIZE", "10")),
timeout=10,
kwargs={"row_factory": dict_row, "connect_timeout": 5},
open=False,
)
pool.open()
_POOL = pool
return _POOL
def _connect():
if not DB_ENABLED:
raise RuntimeError("database disabled")
if ConnectionPool is not None:
# pool.connection() is a context manager that returns the conn to the
# pool on exit, matching the existing `with _connect() as conn:` callers.
return _pool().connection()
return psycopg.connect(DATABASE_URL, row_factory=dict_row, connect_timeout=5)
@@ -45,12 +82,14 @@ def _json(value: Any):
def _execute_safely(label: str, fn):
# DB disabled is an expected, silent no-op; an actual failure while the DB is
# enabled is a real problem (stale job index / dropped audit) and must be loud.
if not DB_ENABLED:
return None
try:
return fn()
except Exception as exc:
print(f"[db] {label} failed: {exc}", flush=True)
logger.error("[db] %s failed: %s", label, exc)
return None