feat: improve subject generation workflow

This commit is contained in:
2026-05-18 17:44:52 +08:00
parent 78bd294d57
commit 1f600ae436
12 changed files with 682 additions and 372 deletions

View File

@@ -22,7 +22,9 @@ LLM_API_KEY=
ASR_MODEL=whisper-1
ASR_FALLBACK_MODEL=gemini-2.5-flash
TRANSLATE_MODEL=gemini-2.5-flash
REWRITE_MODEL=gemini-2.5-pro
GPT_TEXT_MODEL=gpt-4o
REWRITE_MODEL=gpt-4o
VISION_MODEL=gpt-4o
PRODUCT_VIEW_MODEL=gpt-image-2
IMAGE_BASE_URL=https://ai.skg.com/ezlink/v1
IMAGE_API_KEY=
@@ -33,9 +35,14 @@ SUBJECT_ASSET_IMAGE_MODELS=gpt-image-2
# Optional outbound proxy for AI gateway calls. Leave blank on normal VPS networking.
AI_HTTP_PROXY=
# Optional TikTok download login state for yt-dlp. Keep cookies files private.
YTDLP_COOKIES_FILE=
YTDLP_COOKIES_FROM_BROWSER=
# Audio rewrite and Azure OpenAI TTS
AUDIO_REWRITE_MODEL=gemini-2.5-pro
AUDIO_PRODUCT_BRIEF="SKG smart massage products for daily neck, shoulder, back, eye, knee, and foot relaxation. Keep claims premium, clean, credible, and non-medical."
# Voice is fixed to Azure OpenAI in the backend.
VOICE_PROVIDER=azure_openai
AZURE_OPENAI_BASE_URL=https://ai.skg.com/azure
AZURE_OPENAI_API_KEY=
@@ -43,13 +50,7 @@ AZURE_TTS_MODEL=gpt-4o-mini-tts
AZURE_TTS_VOICE_ID=alloy
AZURE_TTS_VOICE_POOL=alloy,verse,shimmer
AZURE_TTS_PATH=/audio/speech
# Legacy MiniMax TTS fallback; not the default voice provider.
MINIMAX_API_KEY=
MINIMAX_TTS_BASE_URL=https://api.minimax.io
MINIMAX_TTS_MODEL=speech-2.8-turbo
MINIMAX_TTS_VOICE_ID=English_expressive_narrator
MINIMAX_TTS_VOICE_POOL=English_magnetic_voiced_man,English_Upbeat_Woman,English_MaturePartner
AZURE_TTS_PATHS=/audio/speech,/v1/audio/speech
# Video generation. Use SKG Doubao / Seedance gateway in production.
POE_API_BASE_URL=https://api.poe.com/v1