init repo
This commit is contained in:
393
app/graph/nodes.py
Normal file
393
app/graph/nodes.py
Normal file
@@ -0,0 +1,393 @@
|
||||
"""Graph nodes — each node is an async function: ReportState → ReportState.
|
||||
|
||||
Node layout (v2 — domain-aware, bilingual):
|
||||
|
||||
START
|
||||
│
|
||||
▼
|
||||
[decompose] — Lead Agent 分解为并行研究轨道,每轨标注 domain + language
|
||||
│
|
||||
▼
|
||||
[parallel_research] — N 个子 Agent 并行,每个用最适合该领域的模型
|
||||
│ global tracks → Claude/GPT (English)
|
||||
│ china tracks → DeepSeek/Qwen (Chinese)
|
||||
▼
|
||||
[write] — Writer 汇聚 → 生成主语言版本
|
||||
│
|
||||
▼
|
||||
[translate] — 高质量翻译 → 生成另一语言版本
|
||||
│
|
||||
▼
|
||||
[data] — Data Agent 生成图表/表格
|
||||
│
|
||||
▼
|
||||
[review] — Reviewer 审查(双语)
|
||||
│ ├─ pass → [format]
|
||||
│ └─ revise → [write]
|
||||
▼
|
||||
[format] — 输出双语版本文件
|
||||
│
|
||||
▼
|
||||
END
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from app.agents.base import BaseAgent
|
||||
from app.agents.researcher import ResearcherAgent
|
||||
from app.agents.writer import WriterAgent
|
||||
from app.agents.data_agent import DataAgent
|
||||
from app.agents.reviewer import ReviewerAgent
|
||||
from app.agents.formatter import FormatterAgent
|
||||
from app.config import settings
|
||||
|
||||
from .state import ReportState, SubtaskResult, NodeStatus, ContentDomain
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: decompose — Lead Agent decomposes into domain-tagged parallel tracks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DecomposeNode:
|
||||
"""Analyzes requirement and decomposes into domain-aware research tracks."""
|
||||
|
||||
def __init__(self):
|
||||
self.agent = BaseAgent()
|
||||
self.agent.name = "lead"
|
||||
self.agent.model = settings.model_for_domain("reasoning")
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "decompose"
|
||||
state.log_node("decompose", NodeStatus.RUNNING)
|
||||
|
||||
system = """\
|
||||
You are a senior consulting partner planning a global industry report.
|
||||
|
||||
Your job is to decompose the client's requirement into 2-6 parallel research tracks.
|
||||
|
||||
CRITICAL: Each track must be tagged with a content domain and native language:
|
||||
|
||||
- domain: "global" → international markets, global competition, technology trends, overseas benchmarks
|
||||
→ native_language: "en" (English sources are 10-100x richer for global analysis)
|
||||
|
||||
- domain: "china" → Chinese domestic market, government policy, local competitors, China-specific data
|
||||
→ native_language: "zh" (Chinese sources are authoritative for domestic analysis)
|
||||
|
||||
The PRINCIPLE: whichever language has the richest professional literature for that topic
|
||||
should be the native language. The other language version will be translated later.
|
||||
|
||||
Output (JSON):
|
||||
{
|
||||
"title_en": "English report title",
|
||||
"title_zh": "中文报告标题",
|
||||
"report_type": "report type",
|
||||
"tracks": [
|
||||
{
|
||||
"title": "track title (in native language)",
|
||||
"domain": "global|china",
|
||||
"native_language": "en|zh",
|
||||
"focus": "research focus description",
|
||||
"prompt": "detailed research instructions (MUST be in the native_language)",
|
||||
"data_needs": ["required data/charts"]
|
||||
}
|
||||
],
|
||||
"synthesis_guide": "How to merge all tracks into a coherent report (bilingual structure notes)",
|
||||
"methodology": "Analysis methodology"
|
||||
}"""
|
||||
|
||||
prompt = f"""\
|
||||
## Client requirement
|
||||
{state.requirement}
|
||||
|
||||
## Report type
|
||||
{state.report_type}
|
||||
|
||||
## Additional data
|
||||
{state.extra_data or "(none)"}
|
||||
|
||||
## Client context
|
||||
{state.client_context or "(none)"}
|
||||
|
||||
Decompose into parallel research tracks with domain and language tags. Output JSON."""
|
||||
|
||||
result = await self.agent.call_llm_json(prompt, system=system)
|
||||
state.decomposition = result
|
||||
state.log_node("decompose", NodeStatus.COMPLETED,
|
||||
f"{len(result.get('tracks', []))} tracks")
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: parallel_research — domain-aware parallel execution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ParallelResearchNode:
|
||||
"""Runs research subtasks in parallel, each using the optimal model for its domain."""
|
||||
|
||||
MAX_CONCURRENT = 5
|
||||
|
||||
async def _run_one(self, track: dict[str, Any]) -> SubtaskResult:
|
||||
domain_str = track.get("domain", "global")
|
||||
domain = ContentDomain(domain_str) if domain_str in ContentDomain.__members__.values() else ContentDomain.GLOBAL
|
||||
native_lang = track.get("native_language", "en")
|
||||
|
||||
result = SubtaskResult(
|
||||
description=track.get("title", ""),
|
||||
domain=domain,
|
||||
native_language=native_lang,
|
||||
)
|
||||
result.status = NodeStatus.RUNNING
|
||||
result.started_at = datetime.now()
|
||||
|
||||
try:
|
||||
# Select model based on domain
|
||||
model = settings.model_for_domain(domain.value)
|
||||
agent = ResearcherAgent(model=model, language=native_lang)
|
||||
|
||||
logger.info(
|
||||
f"[parallel_research] track '{track.get('title')}' "
|
||||
f"→ domain={domain.value}, lang={native_lang}, model={model}"
|
||||
)
|
||||
|
||||
research = await agent.run({
|
||||
"requirement": track["prompt"],
|
||||
"report_type": track.get("focus", ""),
|
||||
"extra_data": "",
|
||||
})
|
||||
result.content = research.get("research", {})
|
||||
result.status = NodeStatus.COMPLETED
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
result.status = NodeStatus.FAILED
|
||||
logger.exception(f"Research track '{track.get('title')}' failed")
|
||||
finally:
|
||||
result.completed_at = datetime.now()
|
||||
|
||||
return result
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "parallel_research"
|
||||
state.log_node("parallel_research", NodeStatus.RUNNING)
|
||||
|
||||
tracks = state.decomposition.get("tracks", [])
|
||||
if not tracks:
|
||||
state.log_node("parallel_research", NodeStatus.FAILED, "no tracks")
|
||||
state.error = "Decomposition produced no research tracks"
|
||||
return state
|
||||
|
||||
semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
|
||||
|
||||
async def bounded(track):
|
||||
async with semaphore:
|
||||
return await self._run_one(track)
|
||||
|
||||
logger.info(f"[parallel_research] launching {len(tracks)} tracks concurrently")
|
||||
results = await asyncio.gather(*[bounded(t) for t in tracks])
|
||||
state.research_results = list(results)
|
||||
|
||||
succeeded = sum(1 for r in results if r.status == NodeStatus.COMPLETED)
|
||||
domains = {}
|
||||
for r in results:
|
||||
domains.setdefault(r.domain.value, []).append(r.native_language)
|
||||
state.log_node("parallel_research", NodeStatus.COMPLETED,
|
||||
f"{succeeded}/{len(tracks)} ok, domains={domains}")
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: write — synthesize research into primary-language draft
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WriteNode:
|
||||
def __init__(self):
|
||||
self.agent = WriterAgent()
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "write"
|
||||
state.log_node("write", NodeStatus.RUNNING)
|
||||
|
||||
research_merged = []
|
||||
for r in state.research_results:
|
||||
if r.status == NodeStatus.COMPLETED:
|
||||
research_merged.append({
|
||||
"track": r.description,
|
||||
"domain": r.domain.value,
|
||||
"native_language": r.native_language,
|
||||
"findings": r.content,
|
||||
})
|
||||
|
||||
synthesis_guide = state.decomposition.get("synthesis_guide", "")
|
||||
review_feedback = ""
|
||||
if state.revision_count > 0 and state.review:
|
||||
review_feedback = f"\n\n## Review feedback (revision {state.revision_count})\n"
|
||||
for issue in state.review.get("issues", []):
|
||||
review_feedback += f"- [{issue.get('severity')}] {issue.get('description')} → {issue.get('suggestion')}\n"
|
||||
|
||||
result = await self.agent.run({
|
||||
"requirement": state.requirement,
|
||||
"research": {
|
||||
"title_en": state.decomposition.get("title_en", ""),
|
||||
"title_zh": state.decomposition.get("title_zh", ""),
|
||||
"methodology": state.decomposition.get("methodology", ""),
|
||||
"tracks": research_merged,
|
||||
"synthesis_guide": synthesis_guide,
|
||||
},
|
||||
"revision_feedback": review_feedback,
|
||||
})
|
||||
|
||||
state.draft = result.get("draft", {})
|
||||
state.log_node("write", NodeStatus.COMPLETED)
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: translate — produce the other language version
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TranslateNode:
|
||||
"""Translates the draft into the other language version."""
|
||||
|
||||
def __init__(self):
|
||||
self.agent = BaseAgent()
|
||||
self.agent.name = "translator"
|
||||
self.agent.model = settings.model_for_domain("translation")
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "translate"
|
||||
state.log_node("translate", NodeStatus.RUNNING)
|
||||
|
||||
if not state.draft or "en" not in state.output_languages:
|
||||
state.log_node("translate", NodeStatus.COMPLETED, "skipped")
|
||||
return state
|
||||
|
||||
draft_json = json.dumps(state.draft, ensure_ascii=False, indent=2)
|
||||
|
||||
# Detect primary language of draft
|
||||
title = state.draft.get("title", "")
|
||||
is_chinese_primary = any('\u4e00' <= c <= '\u9fff' for c in title)
|
||||
|
||||
if is_chinese_primary:
|
||||
target_lang = "English"
|
||||
source_lang = "Chinese"
|
||||
else:
|
||||
target_lang = "Chinese (Simplified)"
|
||||
source_lang = "English"
|
||||
|
||||
system = f"""\
|
||||
You are a world-class {source_lang} → {target_lang} translator specializing in
|
||||
consulting and business reports.
|
||||
|
||||
Translation principles:
|
||||
1. ACCURACY over fluency — every data point, percentage, and proper noun must be correct
|
||||
2. Professional terminology — use standard {target_lang} business/industry terms
|
||||
3. Preserve structure — keep the exact same JSON structure, only translate text values
|
||||
4. Cultural adaptation — adjust phrasing for the target audience (not word-for-word)
|
||||
5. Keep {{{{CHART:...}}}} and {{{{TABLE:...}}}} markers, translate their descriptions
|
||||
|
||||
Output the translated JSON with the exact same structure."""
|
||||
|
||||
prompt = f"""\
|
||||
Translate this consulting report from {source_lang} to {target_lang}.
|
||||
|
||||
{draft_json}
|
||||
|
||||
Output the translated JSON."""
|
||||
|
||||
translated = await self.agent.call_llm_json(prompt, system=system, max_tokens=8192)
|
||||
state.draft_translated = translated
|
||||
state.log_node("translate", NodeStatus.COMPLETED,
|
||||
f"{source_lang} → {target_lang}")
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: data — generate charts and tables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DataNode:
|
||||
def __init__(self):
|
||||
self.agent = DataAgent()
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "data"
|
||||
state.log_node("data", NodeStatus.RUNNING)
|
||||
|
||||
result = await self.agent.run({
|
||||
"draft": state.draft,
|
||||
"extra_data": state.extra_data,
|
||||
})
|
||||
|
||||
state.data_assets = result.get("data_assets", {})
|
||||
state.log_node("data", NodeStatus.COMPLETED)
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: review — bilingual quality check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ReviewNode:
|
||||
def __init__(self):
|
||||
self.agent = ReviewerAgent()
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "review"
|
||||
state.log_node("review", NodeStatus.RUNNING)
|
||||
|
||||
result = await self.agent.run({
|
||||
"draft": state.draft,
|
||||
"draft_translated": state.draft_translated,
|
||||
"research": state.decomposition,
|
||||
})
|
||||
|
||||
state.review = result.get("review", {})
|
||||
state.log_node("review", NodeStatus.COMPLETED,
|
||||
f"verdict={state.review.get('verdict', '?')}")
|
||||
return state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node: format — render bilingual output files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class FormatNode:
|
||||
def __init__(self):
|
||||
self.agent = FormatterAgent()
|
||||
|
||||
async def __call__(self, state: ReportState) -> ReportState:
|
||||
state.current_node = "format"
|
||||
state.log_node("format", NodeStatus.RUNNING)
|
||||
|
||||
all_files = []
|
||||
|
||||
# Primary version
|
||||
result = await self.agent.run({
|
||||
"draft": state.draft,
|
||||
"data_assets": state.data_assets,
|
||||
"output_dir": str(settings.output_dir / state.id / "primary"),
|
||||
"output_formats": state.output_formats,
|
||||
})
|
||||
all_files.extend(result.get("generated_files", []))
|
||||
|
||||
# Translated version (if available)
|
||||
if state.draft_translated:
|
||||
result_tr = await self.agent.run({
|
||||
"draft": state.draft_translated,
|
||||
"data_assets": state.data_assets,
|
||||
"output_dir": str(settings.output_dir / state.id / "translated"),
|
||||
"output_formats": state.output_formats,
|
||||
})
|
||||
all_files.extend(result_tr.get("generated_files", []))
|
||||
|
||||
state.generated_files = all_files
|
||||
state.log_node("format", NodeStatus.COMPLETED,
|
||||
f"{len(all_files)} files")
|
||||
return state
|
||||
Reference in New Issue
Block a user