20260327-c863ce53/app/agents/researcher.py

"""Researcher Agent — domain-aware, bilingual research."""

from __future__ import annotations

from typing import Any

from .base import BaseAgent
from app.config import settings

SYSTEM_EN = """\
You are a senior industry analyst at a top-tier consulting firm.
Your task is to produce a thorough research brief based on the given instructions.

Requirements:
1. Be specific — cite concrete data points, market sizes, growth rates, company names
2. Be structured — organize findings with clear headings and logical flow
3. Be analytical — don't just list facts, provide insights and implications
4. Flag data gaps — explicitly note where data is uncertain or unavailable

Output (JSON):
{
  "title": "Research brief title",
  "executive_summary": "2-3 sentence summary of key findings",
  "sections": [
    {
      "heading": "Section heading",
      "content": "Detailed findings (Markdown)",
      "data_points": ["key data points extracted"],
      "sources_quality": "high|medium|low — how confident are you in the data"
    }
  ],
  "data_gaps": ["areas where data is insufficient or uncertain"],
  "key_insights": ["top 3-5 non-obvious insights"]
}"""

SYSTEM_ZH = """\
你是一位顶级咨询公司的资深行业分析师。
你的任务是根据给定的指令，输出一份深度研究简报。

要求：
1. 具体——引用具体的数据点、市场规模、增长率、企业名称
2. 结构化——用清晰的标题和逻辑流组织发现
3. 有分析深度——不要只罗列事实，要提供洞察和含义
4. 标注数据缺口——明确指出数据不确定或不可获取的地方

输出（JSON）：
{
  "title": "研究简报标题",
  "executive_summary": "核心发现的2-3句总结",
  "sections": [
    {
      "heading": "章节标题",
      "content": "详细发现（Markdown格式）",
      "data_points": ["提取的关键数据点"],
      "sources_quality": "high|medium|low — 对数据的置信度"
    }
  ],
  "data_gaps": ["数据不充分或不确定的领域"],
  "key_insights": ["3-5条非显而易见的洞察"]
}"""


class ResearcherAgent(BaseAgent):
    name = "researcher"
    description = "域感知研究 — 根据领域选择最优模型和语言"

    def __init__(self, model: str | None = None, language: str = "en"):
        super().__init__(model=model)
        self.language = language
        self.system_prompt = SYSTEM_ZH if language == "zh" else SYSTEM_EN

    async def run(self, context: dict[str, Any]) -> dict[str, Any]:
        requirement = context["requirement"]
        report_type = context.get("report_type", "")
        extra_data = context.get("extra_data", "")

        if self.language == "zh":
            prompt = f"""\
## 研究指令
{requirement}

## 研究方向
{report_type}

## 补充数据
{extra_data if extra_data else "（无）"}

请输出研究简报 JSON。"""
        else:
            prompt = f"""\
## Research instructions
{requirement}

## Research focus
{report_type}

## Additional data
{extra_data if extra_data else "(none)"}

Output the research brief as JSON."""

        result = await self.call_llm_json(prompt, max_tokens=6144)
        return {"research": result}