init repo
This commit is contained in:
15
app/agents/__init__.py
Normal file
15
app/agents/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from .base import BaseAgent
|
||||
from .researcher import ResearcherAgent
|
||||
from .writer import WriterAgent
|
||||
from .data_agent import DataAgent
|
||||
from .reviewer import ReviewerAgent
|
||||
from .formatter import FormatterAgent
|
||||
|
||||
__all__ = [
|
||||
"BaseAgent",
|
||||
"ResearcherAgent",
|
||||
"WriterAgent",
|
||||
"DataAgent",
|
||||
"ReviewerAgent",
|
||||
"FormatterAgent",
|
||||
]
|
||||
166
app/agents/base.py
Normal file
166
app/agents/base.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""Base agent with LLM calling via litellm."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import litellm
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Disable litellm telemetry
|
||||
litellm.telemetry = False
|
||||
|
||||
|
||||
class BaseAgent:
|
||||
"""Base class for all pipeline agents."""
|
||||
|
||||
name: str = "base"
|
||||
description: str = ""
|
||||
system_prompt: str = ""
|
||||
model: str = "" # empty = use default from config
|
||||
|
||||
def __init__(self, model: str | None = None):
|
||||
if model:
|
||||
self.model = model
|
||||
|
||||
def get_model(self) -> str:
|
||||
return self.model or settings.llm_model
|
||||
|
||||
async def call_llm(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
system: str | None = None,
|
||||
temperature: float = 0.3,
|
||||
max_tokens: int = 4096,
|
||||
response_format: dict | None = None,
|
||||
) -> str:
|
||||
"""Call LLM via litellm. Returns the text response."""
|
||||
messages = []
|
||||
sys_prompt = system or self.system_prompt
|
||||
if sys_prompt:
|
||||
messages.append({"role": "system", "content": sys_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": self.get_model(),
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
if settings.llm_api_key:
|
||||
kwargs["api_key"] = settings.llm_api_key
|
||||
if settings.llm_api_base:
|
||||
kwargs["api_base"] = settings.llm_api_base
|
||||
if response_format:
|
||||
kwargs["response_format"] = response_format
|
||||
|
||||
logger.info(f"[{self.name}] calling {self.get_model()}")
|
||||
response = await litellm.acompletion(**kwargs)
|
||||
content = response.choices[0].message.content
|
||||
logger.info(f"[{self.name}] got {len(content)} chars")
|
||||
return content
|
||||
|
||||
async def call_llm_json(self, prompt: str, **kwargs) -> dict:
|
||||
"""Call LLM and parse response as JSON."""
|
||||
raw = await self.call_llm(
|
||||
prompt,
|
||||
response_format={"type": "json_object"},
|
||||
**kwargs,
|
||||
)
|
||||
# Strip markdown code fences if present
|
||||
text = raw.strip()
|
||||
if text.startswith("```"):
|
||||
first_nl = text.find("\n")
|
||||
if first_nl != -1:
|
||||
text = text[first_nl + 1:]
|
||||
if text.endswith("```"):
|
||||
text = text[: text.rfind("```")]
|
||||
text = text.strip()
|
||||
|
||||
# Sanitize control characters inside JSON string values
|
||||
# (models sometimes emit literal newlines/tabs inside strings)
|
||||
import re
|
||||
def _clean_json_string(s: str) -> str:
|
||||
# Replace unescaped control chars within JSON strings
|
||||
# This is a best-effort fix for common model outputs
|
||||
result = []
|
||||
in_string = False
|
||||
escape = False
|
||||
for ch in s:
|
||||
if escape:
|
||||
result.append(ch)
|
||||
escape = False
|
||||
continue
|
||||
if ch == '\\':
|
||||
result.append(ch)
|
||||
escape = True
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = not in_string
|
||||
result.append(ch)
|
||||
continue
|
||||
if in_string and ord(ch) < 32:
|
||||
# Replace control chars with escaped versions
|
||||
if ch == '\n':
|
||||
result.append('\\n')
|
||||
elif ch == '\r':
|
||||
result.append('\\r')
|
||||
elif ch == '\t':
|
||||
result.append('\\t')
|
||||
else:
|
||||
result.append(f'\\u{ord(ch):04x}')
|
||||
continue
|
||||
result.append(ch)
|
||||
return ''.join(result)
|
||||
|
||||
# Try parsing with multiple strategies
|
||||
for attempt, candidate in enumerate([text, _clean_json_string(text)]):
|
||||
try:
|
||||
return json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Last resort: try to extract the largest valid JSON object
|
||||
# (model may have appended commentary after the JSON)
|
||||
brace_depth = 0
|
||||
start = text.find('{')
|
||||
if start == -1:
|
||||
raise json.JSONDecodeError("No JSON object found", text, 0)
|
||||
|
||||
cleaned = _clean_json_string(text)
|
||||
for i, ch in enumerate(cleaned[start:], start):
|
||||
if ch == '{':
|
||||
brace_depth += 1
|
||||
elif ch == '}':
|
||||
brace_depth -= 1
|
||||
if brace_depth == 0:
|
||||
try:
|
||||
return json.loads(cleaned[start:i + 1])
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# If all else fails, use json_repair library or raise
|
||||
try:
|
||||
import json_repair
|
||||
return json_repair.loads(text)
|
||||
except (ImportError, Exception):
|
||||
raise json.JSONDecodeError(
|
||||
f"Failed to parse JSON after multiple attempts", text, 0
|
||||
)
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Execute this agent's task. Override in subclasses.
|
||||
|
||||
Args:
|
||||
context: Shared pipeline context (accumulated by previous agents).
|
||||
|
||||
Returns:
|
||||
Dict of new keys to merge into context.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
78
app/agents/data_agent.py
Normal file
78
app/agents/data_agent.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Data Agent — processes data, generates chart specs and table data."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseAgent
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class DataAgent(BaseAgent):
|
||||
name = "data"
|
||||
description = "处理数据、生成图表规格和表格数据"
|
||||
system_prompt = """\
|
||||
你是一位数据分析专家。你的任务是根据报告草稿中标注的图表和表格需求,
|
||||
生成具体的数据和图表规格。
|
||||
|
||||
输出要求(JSON 格式):
|
||||
{
|
||||
"charts": [
|
||||
{
|
||||
"id": "chart_1",
|
||||
"title": "图表标题",
|
||||
"type": "bar|line|pie|area|scatter",
|
||||
"description": "图表说明",
|
||||
"data": {
|
||||
"labels": ["标签1", "标签2"],
|
||||
"datasets": [
|
||||
{"label": "数据集名", "data": [100, 200]}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"tables": [
|
||||
{
|
||||
"id": "table_1",
|
||||
"title": "表格标题",
|
||||
"headers": ["列1", "列2", "列3"],
|
||||
"rows": [["数据1", "数据2", "数据3"]]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(model=settings.model_for_domain("fast"))
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
draft = context["draft"]
|
||||
extra_data = context.get("extra_data", "")
|
||||
|
||||
# Collect chart/table needs from draft
|
||||
chart_needs = []
|
||||
table_needs = []
|
||||
for ch in draft.get("chapters", []):
|
||||
chart_needs.extend(ch.get("charts", []))
|
||||
table_needs.extend(ch.get("tables", []))
|
||||
|
||||
if not chart_needs and not table_needs:
|
||||
return {"data_assets": {"charts": [], "tables": []}}
|
||||
|
||||
prompt = f"""\
|
||||
## 报告标题
|
||||
{draft.get("title", "")}
|
||||
|
||||
## 需要生成的图表
|
||||
{json.dumps(chart_needs, ensure_ascii=False)}
|
||||
|
||||
## 需要生成的表格
|
||||
{json.dumps(table_needs, ensure_ascii=False)}
|
||||
|
||||
## 补充数据源
|
||||
{extra_data if extra_data else "(无额外数据,请根据行业常识生成合理的示例数据)"}
|
||||
|
||||
请为以上需求生成具体的图表规格和表格数据。输出 JSON。"""
|
||||
|
||||
result = await self.call_llm_json(prompt)
|
||||
return {"data_assets": result}
|
||||
669
app/agents/formatter.py
Normal file
669
app/agents/formatter.py
Normal file
@@ -0,0 +1,669 @@
|
||||
"""Formatter Agent — renders final report using Skills toolkit.
|
||||
|
||||
Skills integration:
|
||||
- docx: python-docx (baseline) + docx-js via Node.js (rich mode) + OOXML template editing
|
||||
- pptx: html2pptx.js via Node.js (visual slides) + python-pptx fallback
|
||||
- xlsx: openpyxl + recalc.py (formula recalculation via LibreOffice)
|
||||
- pdf: reportlab with CJK support + fpdf2 fallback
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Skills root
|
||||
SKILLS_ROOT = Path.home() / "Projects/code/20260119-skills合集/anthropics_skills/skills"
|
||||
DOCX_SKILLS = SKILLS_ROOT / "docx"
|
||||
PPTX_SKILLS = SKILLS_ROOT / "pptx"
|
||||
XLSX_SKILLS = SKILLS_ROOT / "xlsx"
|
||||
PDF_SKILLS = SKILLS_ROOT / "pdf"
|
||||
|
||||
|
||||
def _skills_available() -> dict[str, bool]:
|
||||
"""Check which skill toolkits are available."""
|
||||
return {
|
||||
"docx_js": (DOCX_SKILLS / "docx-js.md").exists(),
|
||||
"html2pptx": (PPTX_SKILLS / "scripts" / "html2pptx.js").exists(),
|
||||
"recalc": (XLSX_SKILLS / "recalc.py").exists(),
|
||||
"ooxml_docx": (DOCX_SKILLS / "ooxml" / "scripts" / "unpack.py").exists(),
|
||||
"ooxml_pptx": (PPTX_SKILLS / "ooxml" / "scripts" / "unpack.py").exists(),
|
||||
"pdf_scripts": (PDF_SKILLS / "scripts").is_dir(),
|
||||
}
|
||||
|
||||
|
||||
class FormatterAgent(BaseAgent):
|
||||
name = "formatter"
|
||||
description = "将报告渲染为 docx/pptx/xlsx/pdf,融合 Skills 能力"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.skills = _skills_available()
|
||||
available = [k for k, v in self.skills.items() if v]
|
||||
logger.info(f"[formatter] available skills: {available}")
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
draft = context["draft"]
|
||||
data_assets = context.get("data_assets", {})
|
||||
output_dir = Path(context.get("output_dir", "output"))
|
||||
formats = context.get("output_formats", ["docx"])
|
||||
template_path = context.get("template_path") # optional: user-provided template
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
title = draft.get("title", "报告")
|
||||
generated_files = []
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
match fmt:
|
||||
case "docx":
|
||||
path = await self._render_docx(draft, data_assets, output_dir, title, template_path)
|
||||
case "pptx":
|
||||
path = await self._render_pptx(draft, data_assets, output_dir, title)
|
||||
case "xlsx":
|
||||
path = await self._render_xlsx(data_assets, output_dir, title)
|
||||
case "pdf":
|
||||
path = await self._render_pdf(draft, data_assets, output_dir, title)
|
||||
case _:
|
||||
logger.warning(f"Unsupported format: {fmt}")
|
||||
continue
|
||||
generated_files.append(str(path))
|
||||
logger.info(f"[formatter] generated {path}")
|
||||
except Exception as e:
|
||||
logger.exception(f"[formatter] failed to render {fmt}")
|
||||
|
||||
return {"generated_files": generated_files}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# DOCX — python-docx baseline + OOXML template editing
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _render_docx(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str,
|
||||
template_path: str | None = None,
|
||||
) -> Path:
|
||||
if template_path and self.skills["ooxml_docx"]:
|
||||
return await self._render_docx_from_template(
|
||||
draft, data_assets, output_dir, title, Path(template_path)
|
||||
)
|
||||
return await self._render_docx_baseline(draft, data_assets, output_dir, title)
|
||||
|
||||
async def _render_docx_baseline(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
from docx import Document
|
||||
from docx.shared import Pt, RGBColor
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
|
||||
doc = Document()
|
||||
|
||||
# -- Styles --
|
||||
style = doc.styles["Normal"]
|
||||
style.font.name = "微软雅黑"
|
||||
style.font.size = Pt(11)
|
||||
|
||||
# Title
|
||||
t = doc.add_heading(title, level=0)
|
||||
t.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Executive summary
|
||||
if summary := draft.get("executive_summary"):
|
||||
doc.add_heading("执行摘要", level=1)
|
||||
# Add summary with highlight styling
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(summary)
|
||||
run.font.size = Pt(11)
|
||||
run.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
|
||||
|
||||
# Chapters
|
||||
for chapter in draft.get("chapters", []):
|
||||
doc.add_heading(chapter["title"], level=1)
|
||||
content = chapter.get("content", "")
|
||||
self._docx_render_markdown(doc, content)
|
||||
|
||||
# Tables from data assets
|
||||
for table_spec in data_assets.get("tables", []):
|
||||
doc.add_heading(table_spec.get("title", "数据表"), level=2)
|
||||
self._docx_add_table(doc, table_spec)
|
||||
|
||||
# Page break + chart descriptions as placeholders
|
||||
for chart_spec in data_assets.get("charts", []):
|
||||
doc.add_heading(chart_spec.get("title", "图表"), level=2)
|
||||
desc = chart_spec.get("description", "")
|
||||
chart_type = chart_spec.get("type", "")
|
||||
doc.add_paragraph(f"[{chart_type.upper()} 图表] {desc}")
|
||||
# Render chart data as a table too
|
||||
chart_data = chart_spec.get("data", {})
|
||||
if labels := chart_data.get("labels"):
|
||||
for ds in chart_data.get("datasets", []):
|
||||
self._docx_add_table(doc, {
|
||||
"headers": ["项目", ds.get("label", "数据")],
|
||||
"rows": [[str(l), str(v)] for l, v in zip(labels, ds.get("data", []))],
|
||||
})
|
||||
|
||||
path = output_dir / f"{title}.docx"
|
||||
doc.save(str(path))
|
||||
return path
|
||||
|
||||
async def _render_docx_from_template(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str,
|
||||
template_path: Path,
|
||||
) -> Path:
|
||||
"""Edit an existing DOCX template using OOXML unpack/edit/pack workflow."""
|
||||
unpack_script = DOCX_SKILLS / "ooxml" / "scripts" / "unpack.py"
|
||||
pack_script = DOCX_SKILLS / "ooxml" / "scripts" / "pack.py"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir) / "unpacked"
|
||||
|
||||
# Unpack template
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"python3", str(unpack_script), str(template_path), str(work_dir),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
)
|
||||
await proc.wait()
|
||||
|
||||
if proc.returncode != 0:
|
||||
logger.warning("[formatter] OOXML unpack failed, falling back to baseline")
|
||||
return await self._render_docx_baseline(draft, data_assets, output_dir, title)
|
||||
|
||||
# TODO: edit XML content in work_dir based on draft
|
||||
# For now, just pack back as-is (template passthrough)
|
||||
output_path = output_dir / f"{title}.docx"
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"python3", str(pack_script), str(work_dir), str(output_path),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
)
|
||||
await proc.wait()
|
||||
return output_path
|
||||
|
||||
def _docx_render_markdown(self, doc, content: str):
|
||||
"""Convert markdown-ish content to docx paragraphs."""
|
||||
from docx.shared import Pt
|
||||
|
||||
for block in content.split("\n\n"):
|
||||
block = block.strip()
|
||||
if not block:
|
||||
continue
|
||||
if block.startswith("#### "):
|
||||
doc.add_heading(block[5:], level=4)
|
||||
elif block.startswith("### "):
|
||||
doc.add_heading(block[4:], level=3)
|
||||
elif block.startswith("## "):
|
||||
doc.add_heading(block[3:], level=2)
|
||||
elif block.startswith("- ") or block.startswith("* "):
|
||||
# Bullet list
|
||||
for line in block.split("\n"):
|
||||
line = line.lstrip("- *").strip()
|
||||
if line:
|
||||
doc.add_paragraph(line, style="List Bullet")
|
||||
elif block.startswith("1. ") or block.startswith("1)"):
|
||||
# Numbered list
|
||||
for line in block.split("\n"):
|
||||
text = line.lstrip("0123456789.)) ").strip()
|
||||
if text:
|
||||
doc.add_paragraph(text, style="List Number")
|
||||
else:
|
||||
p = doc.add_paragraph(block)
|
||||
for run in p.runs:
|
||||
run.font.size = Pt(11)
|
||||
|
||||
def _docx_add_table(self, doc, table_spec: dict):
|
||||
"""Add a formatted table to the document."""
|
||||
from docx.shared import Pt, RGBColor
|
||||
from docx.oxml.ns import qn
|
||||
|
||||
headers = table_spec.get("headers", [])
|
||||
rows = table_spec.get("rows", [])
|
||||
if not headers:
|
||||
return
|
||||
|
||||
tbl = doc.add_table(rows=1 + len(rows), cols=len(headers))
|
||||
tbl.style = "Light Grid Accent 1"
|
||||
|
||||
# Header row
|
||||
for i, h in enumerate(headers):
|
||||
cell = tbl.rows[0].cells[i]
|
||||
cell.text = str(h)
|
||||
for p in cell.paragraphs:
|
||||
for run in p.runs:
|
||||
run.font.bold = True
|
||||
run.font.size = Pt(10)
|
||||
|
||||
# Data rows
|
||||
for r_idx, row in enumerate(rows):
|
||||
for c_idx, cell_val in enumerate(row):
|
||||
tbl.rows[r_idx + 1].cells[c_idx].text = str(cell_val)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# PPTX — html2pptx.js (rich) or python-pptx (fallback)
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _render_pptx(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
if self.skills["html2pptx"]:
|
||||
try:
|
||||
return await self._render_pptx_html2pptx(draft, data_assets, output_dir, title)
|
||||
except Exception as e:
|
||||
logger.warning(f"[formatter] html2pptx failed ({e}), falling back to python-pptx")
|
||||
return await self._render_pptx_baseline(draft, data_assets, output_dir, title)
|
||||
|
||||
async def _render_pptx_html2pptx(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
"""Generate PPTX using html2pptx.js skill for visual slides."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work = Path(tmpdir)
|
||||
|
||||
# Generate HTML slides
|
||||
slides_html = []
|
||||
# Title slide
|
||||
slides_html.append(f"""<html><body style="width:720pt;height:405pt;display:flex;align-items:center;justify-content:center;flex-direction:column;background:linear-gradient(135deg,#1a1a2e,#16213e);color:white;font-family:sans-serif;">
|
||||
<h1 style="font-size:36pt;margin:0;">{title}</h1>
|
||||
<p style="font-size:18pt;color:#aaa;margin-top:20pt;">{draft.get('executive_summary', '')[:100]}</p>
|
||||
</body></html>""")
|
||||
|
||||
# Chapter slides
|
||||
for ch in draft.get("chapters", []):
|
||||
content_lines = ch.get("content", "")[:400].split("\n")
|
||||
bullets = "".join(f"<li>{l.strip()}</li>" for l in content_lines if l.strip())
|
||||
slides_html.append(f"""<html><body style="width:720pt;height:405pt;padding:40pt;font-family:sans-serif;background:#ffffff;">
|
||||
<h2 style="font-size:28pt;color:#1a1a2e;border-bottom:2pt solid #e94560;padding-bottom:10pt;">{ch['title']}</h2>
|
||||
<ul style="font-size:14pt;color:#333;line-height:1.8;">{bullets}</ul>
|
||||
</body></html>""")
|
||||
|
||||
# Write HTML files
|
||||
for i, html in enumerate(slides_html):
|
||||
(work / f"slide_{i}.html").write_text(html, encoding="utf-8")
|
||||
|
||||
# Write conversion script
|
||||
script = work / "convert.js"
|
||||
html2pptx_path = PPTX_SKILLS / "scripts" / "html2pptx.js"
|
||||
slide_files = [f"slide_{i}.html" for i in range(len(slides_html))]
|
||||
|
||||
script.write_text(f"""\
|
||||
const pptxgen = require('pptxgenjs');
|
||||
const {{ html2pptx }} = require('{html2pptx_path}');
|
||||
const path = require('path');
|
||||
|
||||
async function main() {{
|
||||
const pptx = new pptxgen();
|
||||
pptx.layout = 'LAYOUT_16x9';
|
||||
const files = {json.dumps(slide_files)};
|
||||
for (const f of files) {{
|
||||
await html2pptx(path.join('{work}', f), pptx);
|
||||
}}
|
||||
await pptx.writeFile({{ fileName: '{output_dir / f"{title}.pptx"}' }});
|
||||
}}
|
||||
main().catch(e => {{ console.error(e); process.exit(1); }});
|
||||
""", encoding="utf-8")
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"node", str(script),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=str(work),
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"html2pptx failed: {stderr.decode()}")
|
||||
|
||||
return output_dir / f"{title}.pptx"
|
||||
|
||||
async def _render_pptx_baseline(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
from pptx import Presentation
|
||||
from pptx.util import Inches, Pt
|
||||
from pptx.dml.color import RGBColor
|
||||
|
||||
prs = Presentation()
|
||||
|
||||
# Title slide
|
||||
slide = prs.slides.add_slide(prs.slide_layouts[0])
|
||||
slide.shapes.title.text = title
|
||||
if len(slide.placeholders) > 1:
|
||||
slide.placeholders[1].text = draft.get("executive_summary", "")[:200]
|
||||
|
||||
# Chapter slides
|
||||
for chapter in draft.get("chapters", []):
|
||||
slide = prs.slides.add_slide(prs.slide_layouts[1])
|
||||
slide.shapes.title.text = chapter["title"]
|
||||
body = slide.placeholders[1]
|
||||
tf = body.text_frame
|
||||
tf.clear()
|
||||
|
||||
content = chapter.get("content", "")
|
||||
lines = [l.strip() for l in content.split("\n") if l.strip()]
|
||||
for line in lines[:12]: # max 12 bullets per slide
|
||||
p = tf.add_paragraph()
|
||||
# Strip markdown markers
|
||||
clean = line.lstrip("#-*0123456789.) ").strip()
|
||||
p.text = clean
|
||||
p.font.size = Pt(14)
|
||||
p.space_after = Pt(4)
|
||||
|
||||
# Data table slides
|
||||
for table_spec in data_assets.get("tables", []):
|
||||
slide = prs.slides.add_slide(prs.slide_layouts[5]) # blank layout
|
||||
slide.shapes.title.text = table_spec.get("title", "数据表")
|
||||
|
||||
headers = table_spec.get("headers", [])
|
||||
rows = table_spec.get("rows", [])
|
||||
if headers and rows:
|
||||
n_rows = min(len(rows) + 1, 10) # limit rows per slide
|
||||
n_cols = len(headers)
|
||||
tbl = slide.shapes.add_table(
|
||||
n_rows, n_cols,
|
||||
Inches(0.5), Inches(1.5), Inches(9), Inches(4.5)
|
||||
).table
|
||||
|
||||
for i, h in enumerate(headers):
|
||||
tbl.cell(0, i).text = str(h)
|
||||
for r_idx, row in enumerate(rows[:n_rows - 1]):
|
||||
for c_idx, val in enumerate(row[:n_cols]):
|
||||
tbl.cell(r_idx + 1, c_idx).text = str(val)
|
||||
|
||||
path = output_dir / f"{title}.pptx"
|
||||
prs.save(str(path))
|
||||
return path
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# XLSX — openpyxl + recalc.py (formula recalculation)
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _render_xlsx(
|
||||
self, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "数据总览"
|
||||
|
||||
# Professional styling
|
||||
header_font = Font(bold=True, size=11, color="FFFFFF")
|
||||
header_fill = PatternFill(start_color="1A1A2E", end_color="1A1A2E", fill_type="solid")
|
||||
title_font = Font(bold=True, size=14, color="1A1A2E")
|
||||
thin_border = Border(
|
||||
left=Side(style="thin", color="CCCCCC"),
|
||||
right=Side(style="thin", color="CCCCCC"),
|
||||
top=Side(style="thin", color="CCCCCC"),
|
||||
bottom=Side(style="thin", color="CCCCCC"),
|
||||
)
|
||||
|
||||
current_row = 1
|
||||
has_formulas = False
|
||||
|
||||
for table_spec in data_assets.get("tables", []):
|
||||
# Table title
|
||||
ws.cell(row=current_row, column=1, value=table_spec.get("title", "")).font = title_font
|
||||
current_row += 1
|
||||
|
||||
headers = table_spec.get("headers", [])
|
||||
rows = table_spec.get("rows", [])
|
||||
|
||||
if headers:
|
||||
# Header row with styling
|
||||
for col_idx, h in enumerate(headers, 1):
|
||||
cell = ws.cell(row=current_row, column=col_idx, value=h)
|
||||
cell.font = header_font
|
||||
cell.fill = header_fill
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
cell.border = thin_border
|
||||
current_row += 1
|
||||
|
||||
# Data rows
|
||||
data_start = current_row
|
||||
for row_data in rows:
|
||||
for col_idx, val in enumerate(row_data, 1):
|
||||
cell = ws.cell(row=current_row, column=col_idx, value=val)
|
||||
cell.border = thin_border
|
||||
# Try to convert numeric strings
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
cell.value = float(val.replace(",", ""))
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
current_row += 1
|
||||
|
||||
# Auto-sum row for numeric columns
|
||||
data_end = current_row - 1
|
||||
if data_end > data_start:
|
||||
for col_idx in range(1, len(headers) + 1):
|
||||
col_letter = get_column_letter(col_idx)
|
||||
test_cell = ws.cell(row=data_start, column=col_idx)
|
||||
if isinstance(test_cell.value, (int, float)):
|
||||
cell = ws.cell(
|
||||
row=current_row, column=col_idx,
|
||||
value=f"=SUM({col_letter}{data_start}:{col_letter}{data_end})"
|
||||
)
|
||||
cell.font = Font(bold=True)
|
||||
cell.border = thin_border
|
||||
has_formulas = True
|
||||
elif col_idx == 1:
|
||||
cell = ws.cell(row=current_row, column=1, value="合计")
|
||||
cell.font = Font(bold=True)
|
||||
cell.border = thin_border
|
||||
current_row += 1
|
||||
|
||||
# Auto-fit column widths
|
||||
for col_idx in range(1, len(headers) + 1):
|
||||
max_len = max(
|
||||
len(str(ws.cell(row=r, column=col_idx).value or ""))
|
||||
for r in range(current_row - len(rows) - 2, current_row)
|
||||
)
|
||||
ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 4, 30)
|
||||
|
||||
current_row += 2 # gap between tables
|
||||
|
||||
# Chart data sheets
|
||||
for chart_spec in data_assets.get("charts", []):
|
||||
chart_ws = wb.create_sheet(title=chart_spec.get("title", "图表")[:31])
|
||||
chart_ws.cell(row=1, column=1, value=chart_spec.get("title", "")).font = title_font
|
||||
chart_data = chart_spec.get("data", {})
|
||||
labels = chart_data.get("labels", [])
|
||||
datasets = chart_data.get("datasets", [])
|
||||
|
||||
# Headers: [项目, 数据集1, 数据集2, ...]
|
||||
chart_ws.cell(row=2, column=1, value="项目").font = Font(bold=True)
|
||||
for ds_idx, ds in enumerate(datasets, 2):
|
||||
chart_ws.cell(row=2, column=ds_idx, value=ds.get("label", "")).font = Font(bold=True)
|
||||
|
||||
for r_idx, label in enumerate(labels, 3):
|
||||
chart_ws.cell(row=r_idx, column=1, value=label)
|
||||
for ds_idx, ds in enumerate(datasets, 2):
|
||||
data = ds.get("data", [])
|
||||
if r_idx - 3 < len(data):
|
||||
chart_ws.cell(row=r_idx, column=ds_idx, value=data[r_idx - 3])
|
||||
|
||||
path = output_dir / f"{title}.xlsx"
|
||||
wb.save(str(path))
|
||||
|
||||
# Run recalc.py if we have formulas and the skill is available
|
||||
if has_formulas and self.skills["recalc"]:
|
||||
await self._xlsx_recalc(path)
|
||||
|
||||
return path
|
||||
|
||||
async def _xlsx_recalc(self, path: Path):
|
||||
"""Recalculate formulas using Skills recalc.py (requires LibreOffice)."""
|
||||
recalc_script = XLSX_SKILLS / "recalc.py"
|
||||
logger.info(f"[formatter] running recalc.py on {path}")
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"python3", str(recalc_script), str(path), "30",
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode == 0:
|
||||
result = json.loads(stdout.decode())
|
||||
logger.info(f"[formatter] recalc result: {result.get('status')}")
|
||||
else:
|
||||
logger.warning(f"[formatter] recalc.py failed: {stderr.decode()[:200]}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[formatter] recalc.py error: {e}")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# PDF — reportlab with CJK support + fpdf2 fallback
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _render_pdf(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
try:
|
||||
return await self._render_pdf_reportlab(draft, data_assets, output_dir, title)
|
||||
except Exception as e:
|
||||
logger.warning(f"[formatter] reportlab failed ({e}), falling back to fpdf2")
|
||||
return await self._render_pdf_fpdf(draft, data_assets, output_dir, title)
|
||||
|
||||
async def _render_pdf_reportlab(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
"""Generate PDF with reportlab — better CJK support and table rendering."""
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.lib import colors
|
||||
from reportlab.platypus import (
|
||||
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak,
|
||||
)
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
|
||||
# Try to register a CJK font
|
||||
cjk_font = "Helvetica"
|
||||
for font_path in [
|
||||
"/System/Library/Fonts/STHeiti Medium.ttc",
|
||||
"/System/Library/Fonts/PingFang.ttc",
|
||||
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
||||
]:
|
||||
if Path(font_path).exists():
|
||||
try:
|
||||
pdfmetrics.registerFont(TTFont("CJK", font_path, subfontIndex=0))
|
||||
cjk_font = "CJK"
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
path = output_dir / f"{title}.pdf"
|
||||
doc = SimpleDocTemplate(str(path), pagesize=A4,
|
||||
topMargin=25*mm, bottomMargin=25*mm)
|
||||
|
||||
styles = getSampleStyleSheet()
|
||||
styles.add(ParagraphStyle(
|
||||
name="CJKTitle", fontName=cjk_font, fontSize=22,
|
||||
spaceAfter=12, alignment=1,
|
||||
))
|
||||
styles.add(ParagraphStyle(
|
||||
name="CJKHeading", fontName=cjk_font, fontSize=16,
|
||||
spaceAfter=8, spaceBefore=16, textColor=colors.HexColor("#1a1a2e"),
|
||||
))
|
||||
styles.add(ParagraphStyle(
|
||||
name="CJKBody", fontName=cjk_font, fontSize=11,
|
||||
spaceAfter=6, leading=16,
|
||||
))
|
||||
|
||||
elements = []
|
||||
|
||||
# Title
|
||||
elements.append(Paragraph(title, styles["CJKTitle"]))
|
||||
elements.append(Spacer(1, 12))
|
||||
|
||||
# Executive summary
|
||||
if summary := draft.get("executive_summary"):
|
||||
elements.append(Paragraph("执行摘要", styles["CJKHeading"]))
|
||||
elements.append(Paragraph(summary, styles["CJKBody"]))
|
||||
elements.append(Spacer(1, 12))
|
||||
|
||||
# Chapters
|
||||
for chapter in draft.get("chapters", []):
|
||||
elements.append(PageBreak())
|
||||
elements.append(Paragraph(chapter["title"], styles["CJKHeading"]))
|
||||
content = chapter.get("content", "")
|
||||
for para in content.split("\n\n"):
|
||||
para = para.strip()
|
||||
if para:
|
||||
elements.append(Paragraph(para, styles["CJKBody"]))
|
||||
|
||||
# Tables
|
||||
for table_spec in data_assets.get("tables", []):
|
||||
elements.append(Spacer(1, 12))
|
||||
elements.append(Paragraph(table_spec.get("title", ""), styles["CJKHeading"]))
|
||||
headers = table_spec.get("headers", [])
|
||||
rows = table_spec.get("rows", [])
|
||||
if headers:
|
||||
table_data = [headers] + rows
|
||||
t = Table(table_data)
|
||||
t.setStyle(TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
||||
("FONTNAME", (0, 0), (-1, -1), cjk_font),
|
||||
("FONTSIZE", (0, 0), (-1, 0), 10),
|
||||
("FONTSIZE", (0, 1), (-1, -1), 9),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
|
||||
("ALIGN", (0, 0), (-1, -1), "CENTER"),
|
||||
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f5f5f5")]),
|
||||
]))
|
||||
elements.append(t)
|
||||
|
||||
doc.build(elements)
|
||||
return path
|
||||
|
||||
async def _render_pdf_fpdf(
|
||||
self, draft: dict, data_assets: dict, output_dir: Path, title: str
|
||||
) -> Path:
|
||||
"""Fallback PDF generation with fpdf2."""
|
||||
from fpdf import FPDF
|
||||
|
||||
pdf = FPDF()
|
||||
pdf.set_auto_page_break(auto=True, margin=15)
|
||||
|
||||
# Try CJK font
|
||||
for font_path in [
|
||||
"/System/Library/Fonts/STHeiti Medium.ttc",
|
||||
"/System/Library/Fonts/PingFang.ttc",
|
||||
]:
|
||||
if Path(font_path).exists():
|
||||
try:
|
||||
pdf.add_font("CJK", "", font_path, uni=True)
|
||||
pdf.set_font("CJK", "", 11)
|
||||
break
|
||||
except Exception:
|
||||
pdf.set_font("Helvetica", "", 11)
|
||||
else:
|
||||
pdf.set_font("Helvetica", "", 11)
|
||||
|
||||
pdf.add_page()
|
||||
pdf.set_font_size(24)
|
||||
pdf.cell(0, 20, title, new_x="LMARGIN", new_y="NEXT", align="C")
|
||||
|
||||
pdf.set_font_size(11)
|
||||
if summary := draft.get("executive_summary"):
|
||||
pdf.set_font_size(16)
|
||||
pdf.cell(0, 12, "执行摘要", new_x="LMARGIN", new_y="NEXT")
|
||||
pdf.set_font_size(11)
|
||||
pdf.multi_cell(0, 6, summary)
|
||||
|
||||
for chapter in draft.get("chapters", []):
|
||||
pdf.add_page()
|
||||
pdf.set_font_size(16)
|
||||
pdf.cell(0, 12, chapter["title"], new_x="LMARGIN", new_y="NEXT")
|
||||
pdf.set_font_size(11)
|
||||
pdf.multi_cell(0, 6, chapter.get("content", ""))
|
||||
|
||||
path = output_dir / f"{title}.pdf"
|
||||
pdf.output(str(path))
|
||||
return path
|
||||
103
app/agents/researcher.py
Normal file
103
app/agents/researcher.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Researcher Agent — domain-aware, bilingual research."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseAgent
|
||||
from app.config import settings
|
||||
|
||||
SYSTEM_EN = """\
|
||||
You are a senior industry analyst at a top-tier consulting firm.
|
||||
Your task is to produce a thorough research brief based on the given instructions.
|
||||
|
||||
Requirements:
|
||||
1. Be specific — cite concrete data points, market sizes, growth rates, company names
|
||||
2. Be structured — organize findings with clear headings and logical flow
|
||||
3. Be analytical — don't just list facts, provide insights and implications
|
||||
4. Flag data gaps — explicitly note where data is uncertain or unavailable
|
||||
|
||||
Output (JSON):
|
||||
{
|
||||
"title": "Research brief title",
|
||||
"executive_summary": "2-3 sentence summary of key findings",
|
||||
"sections": [
|
||||
{
|
||||
"heading": "Section heading",
|
||||
"content": "Detailed findings (Markdown)",
|
||||
"data_points": ["key data points extracted"],
|
||||
"sources_quality": "high|medium|low — how confident are you in the data"
|
||||
}
|
||||
],
|
||||
"data_gaps": ["areas where data is insufficient or uncertain"],
|
||||
"key_insights": ["top 3-5 non-obvious insights"]
|
||||
}"""
|
||||
|
||||
SYSTEM_ZH = """\
|
||||
你是一位顶级咨询公司的资深行业分析师。
|
||||
你的任务是根据给定的指令,输出一份深度研究简报。
|
||||
|
||||
要求:
|
||||
1. 具体——引用具体的数据点、市场规模、增长率、企业名称
|
||||
2. 结构化——用清晰的标题和逻辑流组织发现
|
||||
3. 有分析深度——不要只罗列事实,要提供洞察和含义
|
||||
4. 标注数据缺口——明确指出数据不确定或不可获取的地方
|
||||
|
||||
输出(JSON):
|
||||
{
|
||||
"title": "研究简报标题",
|
||||
"executive_summary": "核心发现的2-3句总结",
|
||||
"sections": [
|
||||
{
|
||||
"heading": "章节标题",
|
||||
"content": "详细发现(Markdown格式)",
|
||||
"data_points": ["提取的关键数据点"],
|
||||
"sources_quality": "high|medium|low — 对数据的置信度"
|
||||
}
|
||||
],
|
||||
"data_gaps": ["数据不充分或不确定的领域"],
|
||||
"key_insights": ["3-5条非显而易见的洞察"]
|
||||
}"""
|
||||
|
||||
|
||||
class ResearcherAgent(BaseAgent):
|
||||
name = "researcher"
|
||||
description = "域感知研究 — 根据领域选择最优模型和语言"
|
||||
|
||||
def __init__(self, model: str | None = None, language: str = "en"):
|
||||
super().__init__(model=model)
|
||||
self.language = language
|
||||
self.system_prompt = SYSTEM_ZH if language == "zh" else SYSTEM_EN
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
requirement = context["requirement"]
|
||||
report_type = context.get("report_type", "")
|
||||
extra_data = context.get("extra_data", "")
|
||||
|
||||
if self.language == "zh":
|
||||
prompt = f"""\
|
||||
## 研究指令
|
||||
{requirement}
|
||||
|
||||
## 研究方向
|
||||
{report_type}
|
||||
|
||||
## 补充数据
|
||||
{extra_data if extra_data else "(无)"}
|
||||
|
||||
请输出研究简报 JSON。"""
|
||||
else:
|
||||
prompt = f"""\
|
||||
## Research instructions
|
||||
{requirement}
|
||||
|
||||
## Research focus
|
||||
{report_type}
|
||||
|
||||
## Additional data
|
||||
{extra_data if extra_data else "(none)"}
|
||||
|
||||
Output the research brief as JSON."""
|
||||
|
||||
result = await self.call_llm_json(prompt, max_tokens=6144)
|
||||
return {"research": result}
|
||||
79
app/agents/reviewer.py
Normal file
79
app/agents/reviewer.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Reviewer Agent — bilingual quality check with strongest reasoning model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseAgent
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class ReviewerAgent(BaseAgent):
|
||||
name = "reviewer"
|
||||
description = "双语报告质量审查 — 使用最强推理模型"
|
||||
|
||||
system_prompt = """\
|
||||
You are a senior consulting partner reviewing a report before client delivery.
|
||||
The report has both Chinese and English versions (or will be translated).
|
||||
|
||||
Review dimensions:
|
||||
1. **Accuracy** — Are data points, percentages, and claims supported by the research?
|
||||
Cross-check global claims against English research, Chinese claims against Chinese research.
|
||||
2. **Logical consistency** — Does the narrative flow? Are there contradictions between chapters?
|
||||
3. **Depth of analysis** — Is it consultancy-grade or just surface-level? Would a C-suite exec find it valuable?
|
||||
4. **Bilingual quality** — If translated version exists, check for translation artifacts,
|
||||
mistranslated terminology, or cultural mismatches.
|
||||
5. **Data gaps honesty** — Are uncertainties acknowledged or are claims fabricated?
|
||||
6. **Completeness** — Are any critical aspects of the requirement left unaddressed?
|
||||
|
||||
Scoring guide:
|
||||
- 90+: Publication-ready
|
||||
- 80-89: Minor issues, can pass with notes
|
||||
- 70-79: Needs revision (verdict: revise)
|
||||
- <70: Significant problems (verdict: reject)
|
||||
|
||||
Output (JSON):
|
||||
{
|
||||
"overall_score": 85,
|
||||
"verdict": "pass|revise|reject",
|
||||
"issues": [
|
||||
{
|
||||
"severity": "high|medium|low",
|
||||
"chapter": "affected chapter",
|
||||
"dimension": "accuracy|consistency|depth|bilingual|gaps|completeness",
|
||||
"description": "issue description",
|
||||
"suggestion": "specific fix suggestion"
|
||||
}
|
||||
],
|
||||
"strengths": ["what the report does well"],
|
||||
"summary": "Overall assessment (2-3 sentences)"
|
||||
}"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(model=settings.model_for_domain("reasoning"))
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
draft = context["draft"]
|
||||
draft_translated = context.get("draft_translated", {})
|
||||
research = context["research"]
|
||||
|
||||
sections = [
|
||||
"## Research Plan (what was asked)",
|
||||
json.dumps(research, ensure_ascii=False, indent=2),
|
||||
"",
|
||||
"## Primary Draft",
|
||||
json.dumps(draft, ensure_ascii=False, indent=2),
|
||||
]
|
||||
|
||||
if draft_translated:
|
||||
sections.extend([
|
||||
"",
|
||||
"## Translated Version",
|
||||
json.dumps(draft_translated, ensure_ascii=False, indent=2),
|
||||
])
|
||||
|
||||
prompt = "\n".join(sections) + "\n\nReview the report. Output JSON."
|
||||
|
||||
result = await self.call_llm_json(prompt, max_tokens=4096)
|
||||
return {"review": result}
|
||||
86
app/agents/writer.py
Normal file
86
app/agents/writer.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Writer Agent — synthesizes multilingual research tracks into a cohesive report."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseAgent
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class WriterAgent(BaseAgent):
|
||||
name = "writer"
|
||||
description = "汇聚多语言/多领域研究成果,撰写完整报告"
|
||||
|
||||
system_prompt = """\
|
||||
You are an expert consulting report writer. Your task is to synthesize research
|
||||
findings from MULTIPLE parallel tracks (some in English, some in Chinese) into
|
||||
ONE cohesive, professional consulting report.
|
||||
|
||||
CRITICAL RULES:
|
||||
1. The PRIMARY output language is Chinese (中文) — this is for Chinese clients
|
||||
2. For global/international sections, the analysis depth must reflect the English research
|
||||
3. For China-specific sections, preserve the precision of Chinese-native research
|
||||
4. Maintain professional consulting tone throughout
|
||||
5. Every claim should trace back to a research track's findings
|
||||
6. Mark chart/table needs: {{CHART:描述}} and {{TABLE:描述}}
|
||||
7. If a research track flags "data_gaps", acknowledge uncertainty rather than fabricating
|
||||
|
||||
Output (JSON):
|
||||
{
|
||||
"title": "报告标题(中文)",
|
||||
"title_en": "Report Title (English)",
|
||||
"chapters": [
|
||||
{
|
||||
"title": "章节标题",
|
||||
"content": "章节正文(Markdown 格式,中文)",
|
||||
"source_tracks": ["引用的研究轨道名称"],
|
||||
"charts": ["图表需求"],
|
||||
"tables": ["表格需求"]
|
||||
}
|
||||
],
|
||||
"executive_summary": "执行摘要(中文,300-500字)",
|
||||
"executive_summary_en": "Executive Summary (English, 200-400 words)"
|
||||
}"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(model=settings.model_for_domain("reasoning"))
|
||||
|
||||
async def run(self, context: dict[str, Any]) -> dict[str, Any]:
|
||||
research = context["research"]
|
||||
requirement = context["requirement"]
|
||||
revision_feedback = context.get("revision_feedback", "")
|
||||
|
||||
# Format multi-track, multilingual research
|
||||
tracks_text = ""
|
||||
for track in research.get("tracks", []):
|
||||
lang_tag = f"[{track.get('native_language', '?').upper()}]"
|
||||
domain_tag = f"[{track.get('domain', '?')}]"
|
||||
tracks_text += f"\n### {domain_tag} {lang_tag} {track.get('track', '')}\n"
|
||||
findings = track.get("findings", {})
|
||||
tracks_text += json.dumps(findings, ensure_ascii=False, indent=2)
|
||||
|
||||
synthesis_guide = research.get("synthesis_guide", "")
|
||||
|
||||
prompt = f"""\
|
||||
## 原始需求 / Original Requirement
|
||||
{requirement}
|
||||
|
||||
## 报告标题
|
||||
中文:{research.get("title_zh", "")}
|
||||
English: {research.get("title_en", "")}
|
||||
|
||||
## 写作指导 / Synthesis Guide
|
||||
{synthesis_guide}
|
||||
|
||||
## 各研究轨道成果 / Research Track Results
|
||||
(注意:有些轨道是英文原版 [EN],有些是中文原版 [ZH],请综合使用)
|
||||
{tracks_text}
|
||||
|
||||
{f"## 审稿反馈 / Review Feedback{revision_feedback}" if revision_feedback else ""}
|
||||
|
||||
请汇聚以上研究成果,撰写完整的中文报告。输出 JSON。"""
|
||||
|
||||
result = await self.call_llm_json(prompt, max_tokens=8192)
|
||||
return {"draft": result}
|
||||
Reference in New Issue
Block a user