"""Graph nodes — each node is an async function: ReportState → ReportState. Node layout (v2 — domain-aware, bilingual): START │ ▼ [decompose] — Lead Agent 分解为并行研究轨道,每轨标注 domain + language │ ▼ [parallel_research] — N 个子 Agent 并行,每个用最适合该领域的模型 │ global tracks → Claude/GPT (English) │ china tracks → DeepSeek/Qwen (Chinese) ▼ [write] — Writer 汇聚 → 生成主语言版本 │ ▼ [translate] — 高质量翻译 → 生成另一语言版本 │ ▼ [data] — Data Agent 生成图表/表格 │ ▼ [review] — Reviewer 审查(双语) │ ├─ pass → [format] │ └─ revise → [write] ▼ [format] — 输出双语版本文件 │ ▼ END """ from __future__ import annotations import asyncio import json import logging from datetime import datetime from typing import Any from app.agents.base import BaseAgent from app.agents.researcher import ResearcherAgent from app.agents.writer import WriterAgent from app.agents.data_agent import DataAgent from app.agents.reviewer import ReviewerAgent from app.agents.formatter import FormatterAgent from app.config import settings from .state import ReportState, SubtaskResult, NodeStatus, ContentDomain logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Node: decompose — Lead Agent decomposes into domain-tagged parallel tracks # --------------------------------------------------------------------------- class DecomposeNode: """Analyzes requirement and decomposes into domain-aware research tracks.""" def __init__(self): self.agent = BaseAgent() self.agent.name = "lead" self.agent.model = settings.model_for_domain("reasoning") async def __call__(self, state: ReportState) -> ReportState: state.current_node = "decompose" state.log_node("decompose", NodeStatus.RUNNING) system = """\ You are a senior consulting partner planning a global industry report. Your job is to decompose the client's requirement into 2-6 parallel research tracks. CRITICAL: Each track must be tagged with a content domain and native language: - domain: "global" → international markets, global competition, technology trends, overseas benchmarks → native_language: "en" (English sources are 10-100x richer for global analysis) - domain: "china" → Chinese domestic market, government policy, local competitors, China-specific data → native_language: "zh" (Chinese sources are authoritative for domestic analysis) The PRINCIPLE: whichever language has the richest professional literature for that topic should be the native language. The other language version will be translated later. Output (JSON): { "title_en": "English report title", "title_zh": "中文报告标题", "report_type": "report type", "tracks": [ { "title": "track title (in native language)", "domain": "global|china", "native_language": "en|zh", "focus": "research focus description", "prompt": "detailed research instructions (MUST be in the native_language)", "data_needs": ["required data/charts"] } ], "synthesis_guide": "How to merge all tracks into a coherent report (bilingual structure notes)", "methodology": "Analysis methodology" }""" prompt = f"""\ ## Client requirement {state.requirement} ## Report type {state.report_type} ## Additional data {state.extra_data or "(none)"} ## Client context {state.client_context or "(none)"} Decompose into parallel research tracks with domain and language tags. Output JSON.""" result = await self.agent.call_llm_json(prompt, system=system) state.decomposition = result state.log_node("decompose", NodeStatus.COMPLETED, f"{len(result.get('tracks', []))} tracks") return state # --------------------------------------------------------------------------- # Node: parallel_research — domain-aware parallel execution # --------------------------------------------------------------------------- class ParallelResearchNode: """Runs research subtasks in parallel, each using the optimal model for its domain.""" MAX_CONCURRENT = 5 async def _run_one(self, track: dict[str, Any]) -> SubtaskResult: domain_str = track.get("domain", "global") domain = ContentDomain(domain_str) if domain_str in ContentDomain.__members__.values() else ContentDomain.GLOBAL native_lang = track.get("native_language", "en") result = SubtaskResult( description=track.get("title", ""), domain=domain, native_language=native_lang, ) result.status = NodeStatus.RUNNING result.started_at = datetime.now() try: # Select model based on domain model = settings.model_for_domain(domain.value) agent = ResearcherAgent(model=model, language=native_lang) logger.info( f"[parallel_research] track '{track.get('title')}' " f"→ domain={domain.value}, lang={native_lang}, model={model}" ) research = await agent.run({ "requirement": track["prompt"], "report_type": track.get("focus", ""), "extra_data": "", }) result.content = research.get("research", {}) result.status = NodeStatus.COMPLETED except Exception as e: result.error = str(e) result.status = NodeStatus.FAILED logger.exception(f"Research track '{track.get('title')}' failed") finally: result.completed_at = datetime.now() return result async def __call__(self, state: ReportState) -> ReportState: state.current_node = "parallel_research" state.log_node("parallel_research", NodeStatus.RUNNING) tracks = state.decomposition.get("tracks", []) if not tracks: state.log_node("parallel_research", NodeStatus.FAILED, "no tracks") state.error = "Decomposition produced no research tracks" return state semaphore = asyncio.Semaphore(self.MAX_CONCURRENT) async def bounded(track): async with semaphore: return await self._run_one(track) logger.info(f"[parallel_research] launching {len(tracks)} tracks concurrently") results = await asyncio.gather(*[bounded(t) for t in tracks]) state.research_results = list(results) succeeded = sum(1 for r in results if r.status == NodeStatus.COMPLETED) domains = {} for r in results: domains.setdefault(r.domain.value, []).append(r.native_language) state.log_node("parallel_research", NodeStatus.COMPLETED, f"{succeeded}/{len(tracks)} ok, domains={domains}") return state # --------------------------------------------------------------------------- # Node: write — synthesize research into primary-language draft # --------------------------------------------------------------------------- class WriteNode: def __init__(self): self.agent = WriterAgent() async def __call__(self, state: ReportState) -> ReportState: state.current_node = "write" state.log_node("write", NodeStatus.RUNNING) research_merged = [] for r in state.research_results: if r.status == NodeStatus.COMPLETED: research_merged.append({ "track": r.description, "domain": r.domain.value, "native_language": r.native_language, "findings": r.content, }) synthesis_guide = state.decomposition.get("synthesis_guide", "") review_feedback = "" if state.revision_count > 0 and state.review: review_feedback = f"\n\n## Review feedback (revision {state.revision_count})\n" for issue in state.review.get("issues", []): review_feedback += f"- [{issue.get('severity')}] {issue.get('description')} → {issue.get('suggestion')}\n" result = await self.agent.run({ "requirement": state.requirement, "research": { "title_en": state.decomposition.get("title_en", ""), "title_zh": state.decomposition.get("title_zh", ""), "methodology": state.decomposition.get("methodology", ""), "tracks": research_merged, "synthesis_guide": synthesis_guide, }, "revision_feedback": review_feedback, }) state.draft = result.get("draft", {}) state.log_node("write", NodeStatus.COMPLETED) return state # --------------------------------------------------------------------------- # Node: translate — produce the other language version # --------------------------------------------------------------------------- class TranslateNode: """Translates the draft into the other language version.""" def __init__(self): self.agent = BaseAgent() self.agent.name = "translator" self.agent.model = settings.model_for_domain("translation") async def __call__(self, state: ReportState) -> ReportState: state.current_node = "translate" state.log_node("translate", NodeStatus.RUNNING) if not state.draft or "en" not in state.output_languages: state.log_node("translate", NodeStatus.COMPLETED, "skipped") return state draft_json = json.dumps(state.draft, ensure_ascii=False, indent=2) # Detect primary language of draft title = state.draft.get("title", "") is_chinese_primary = any('\u4e00' <= c <= '\u9fff' for c in title) if is_chinese_primary: target_lang = "English" source_lang = "Chinese" else: target_lang = "Chinese (Simplified)" source_lang = "English" system = f"""\ You are a world-class {source_lang} → {target_lang} translator specializing in consulting and business reports. Translation principles: 1. ACCURACY over fluency — every data point, percentage, and proper noun must be correct 2. Professional terminology — use standard {target_lang} business/industry terms 3. Preserve structure — keep the exact same JSON structure, only translate text values 4. Cultural adaptation — adjust phrasing for the target audience (not word-for-word) 5. Keep {{{{CHART:...}}}} and {{{{TABLE:...}}}} markers, translate their descriptions Output the translated JSON with the exact same structure.""" prompt = f"""\ Translate this consulting report from {source_lang} to {target_lang}. {draft_json} Output the translated JSON.""" translated = await self.agent.call_llm_json(prompt, system=system, max_tokens=8192) state.draft_translated = translated state.log_node("translate", NodeStatus.COMPLETED, f"{source_lang} → {target_lang}") return state # --------------------------------------------------------------------------- # Node: data — generate charts and tables # --------------------------------------------------------------------------- class DataNode: def __init__(self): self.agent = DataAgent() async def __call__(self, state: ReportState) -> ReportState: state.current_node = "data" state.log_node("data", NodeStatus.RUNNING) result = await self.agent.run({ "draft": state.draft, "extra_data": state.extra_data, }) state.data_assets = result.get("data_assets", {}) state.log_node("data", NodeStatus.COMPLETED) return state # --------------------------------------------------------------------------- # Node: review — bilingual quality check # --------------------------------------------------------------------------- class ReviewNode: def __init__(self): self.agent = ReviewerAgent() async def __call__(self, state: ReportState) -> ReportState: state.current_node = "review" state.log_node("review", NodeStatus.RUNNING) result = await self.agent.run({ "draft": state.draft, "draft_translated": state.draft_translated, "research": state.decomposition, }) state.review = result.get("review", {}) state.log_node("review", NodeStatus.COMPLETED, f"verdict={state.review.get('verdict', '?')}") return state # --------------------------------------------------------------------------- # Node: format — render bilingual output files # --------------------------------------------------------------------------- class FormatNode: def __init__(self): self.agent = FormatterAgent() async def __call__(self, state: ReportState) -> ReportState: state.current_node = "format" state.log_node("format", NodeStatus.RUNNING) all_files = [] # Primary version result = await self.agent.run({ "draft": state.draft, "data_assets": state.data_assets, "output_dir": str(settings.output_dir / state.id / "primary"), "output_formats": state.output_formats, }) all_files.extend(result.get("generated_files", [])) # Translated version (if available) if state.draft_translated: result_tr = await self.agent.run({ "draft": state.draft_translated, "data_assets": state.data_assets, "output_dir": str(settings.output_dir / state.id / "translated"), "output_formats": state.output_formats, }) all_files.extend(result_tr.get("generated_files", [])) state.generated_files = all_files state.log_node("format", NodeStatus.COMPLETED, f"{len(all_files)} files") return state