init repo
This commit is contained in:
0
app/data/sources/__init__.py
Normal file
0
app/data/sources/__init__.py
Normal file
94
app/data/sources/akshare_source.py
Normal file
94
app/data/sources/akshare_source.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""AKShare data source — Chinese macro/industry data via open-source Python library.
|
||||
|
||||
Covers: GDP, CPI, PMI, industrial profit, trade balance, and 30+ data categories.
|
||||
All data returned as Pandas DataFrames, converted to dicts for standardization.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from .base import DataSource, DataResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Map common data requests to AKShare function names
|
||||
AKSHARE_ENDPOINTS = {
|
||||
"gdp": "macro_china_gdp",
|
||||
"cpi": "macro_china_cpi_monthly",
|
||||
"ppi": "macro_china_ppi",
|
||||
"pmi": "macro_china_pmi",
|
||||
"industrial_profit": "macro_china_industrial_profit",
|
||||
"trade_balance": "macro_china_trade_balance",
|
||||
"money_supply": "macro_china_money_supply",
|
||||
"fdi": "macro_china_fdi",
|
||||
"real_estate": "macro_china_real_estate",
|
||||
"retail_sales": "macro_china_consumer_goods_retail",
|
||||
"fixed_asset": "macro_china_fai",
|
||||
"unemployment": "macro_china_urban_unemployment",
|
||||
# US macro
|
||||
"us_gdp": "macro_usa_gdp_monthly",
|
||||
"us_cpi": "macro_usa_cpi_monthly",
|
||||
"us_unemployment": "macro_usa_unemployment_rate",
|
||||
# Global
|
||||
"global_gdp": "macro_global_gdp",
|
||||
}
|
||||
|
||||
|
||||
class AKShareSource(DataSource):
|
||||
name = "akshare"
|
||||
description = "中国宏观经济/行业数据(免费开源,封装统计局等30+数据源)"
|
||||
|
||||
def supports(self, data_type: str, country: str | None = None) -> bool:
|
||||
return data_type in ("macro", "industry", "general")
|
||||
|
||||
async def fetch(
|
||||
self, query: str, *, data_type: str = "general", country: str | None = None, **kwargs,
|
||||
) -> DataResult:
|
||||
try:
|
||||
import akshare as ak
|
||||
except ImportError:
|
||||
return DataResult(source=self.name, error="akshare not installed (pip install akshare)")
|
||||
|
||||
# Try to match query to a known endpoint
|
||||
endpoint_name = kwargs.get("endpoint")
|
||||
if not endpoint_name:
|
||||
query_lower = query.lower()
|
||||
for key, func_name in AKSHARE_ENDPOINTS.items():
|
||||
if key in query_lower:
|
||||
endpoint_name = func_name
|
||||
break
|
||||
|
||||
if not endpoint_name:
|
||||
return DataResult(source=self.name, data=None, error=f"No matching AKShare endpoint for: {query}")
|
||||
|
||||
try:
|
||||
func = getattr(ak, endpoint_name, None)
|
||||
if not func:
|
||||
return DataResult(source=self.name, error=f"AKShare function not found: {endpoint_name}")
|
||||
|
||||
logger.info(f"[akshare] calling ak.{endpoint_name}()")
|
||||
df = func()
|
||||
|
||||
# Convert to dict for serialization
|
||||
# Take last N rows for recent data
|
||||
limit = kwargs.get("limit", 20)
|
||||
recent = df.tail(limit)
|
||||
|
||||
return DataResult(
|
||||
source=self.name,
|
||||
data={
|
||||
"columns": list(recent.columns),
|
||||
"records": recent.to_dict(orient="records"),
|
||||
"total_rows": len(df),
|
||||
"returned_rows": len(recent),
|
||||
},
|
||||
metadata={
|
||||
"endpoint": endpoint_name,
|
||||
"description": f"AKShare {endpoint_name}",
|
||||
"format": "tabular",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
return DataResult(source=self.name, error=f"AKShare call failed: {e}")
|
||||
34
app/data/sources/base.py
Normal file
34
app/data/sources/base.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Base class for data sources."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DataResult(BaseModel):
|
||||
"""Standardized result from any data source."""
|
||||
source: str = ""
|
||||
data: Any = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
# metadata includes: unit, time_range, update_date, confidence, etc.
|
||||
error: str | None = None
|
||||
cached: bool = False
|
||||
|
||||
|
||||
class DataSource(ABC):
|
||||
"""Abstract data source."""
|
||||
name: str = "base"
|
||||
description: str = ""
|
||||
|
||||
def supports(self, data_type: str, country: str | None = None) -> bool:
|
||||
"""Return True if this source can handle this data type / country."""
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(
|
||||
self, query: str, *, data_type: str = "general", country: str | None = None, **kwargs,
|
||||
) -> DataResult:
|
||||
...
|
||||
61
app/data/sources/gpt_researcher_source.py
Normal file
61
app/data/sources/gpt_researcher_source.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""GPT Researcher MCP — deep web research as fallback for any industry.
|
||||
|
||||
This is the universal fallback: when structured data sources don't have
|
||||
data for a niche/cold industry, deep web research fills the gap.
|
||||
|
||||
Requires GPT Researcher MCP server to be running (already configured in ~/.claude.json).
|
||||
For direct API use, we call the MCP tools via the subprocess approach.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Any
|
||||
|
||||
from .base import DataSource, DataResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GPTResearcherSource(DataSource):
|
||||
name = "gpt_researcher"
|
||||
description = "Deep web research — universal fallback for any industry/topic"
|
||||
|
||||
def supports(self, data_type: str, country: str | None = None) -> bool:
|
||||
# Supports everything — this is the universal fallback
|
||||
return True
|
||||
|
||||
async def fetch(
|
||||
self, query: str, *, data_type: str = "general", country: str | None = None, **kwargs,
|
||||
) -> DataResult:
|
||||
mode = kwargs.get("mode", "quick") # "quick" or "deep"
|
||||
|
||||
# GPT Researcher is available as MCP tools in Claude Code.
|
||||
# For standalone use, we need to call it via its API.
|
||||
# The MCP server runs at a local port — check if available.
|
||||
|
||||
# For now, provide a structured placeholder that agents can use
|
||||
# to request deep research. The actual MCP call happens at the
|
||||
# agent level when integrated into the pipeline.
|
||||
return DataResult(
|
||||
source=self.name,
|
||||
data={
|
||||
"query": query,
|
||||
"mode": mode,
|
||||
"status": "ready",
|
||||
"note": (
|
||||
"GPT Researcher MCP is available for deep web research. "
|
||||
"Call via MCP tools: deep_research() or quick_search(). "
|
||||
"This source returns research-ready queries for MCP integration."
|
||||
),
|
||||
},
|
||||
metadata={
|
||||
"type": "mcp_research_request",
|
||||
"mode": mode,
|
||||
"data_type": data_type,
|
||||
"country": country,
|
||||
},
|
||||
)
|
||||
104
app/data/sources/worldbank_source.py
Normal file
104
app/data/sources/worldbank_source.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""World Bank Open Data — global macro indicators, 217 economies, free API.
|
||||
|
||||
API: https://api.worldbank.org/v2/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import DataSource, DataResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BASE_URL = "https://api.worldbank.org/v2"
|
||||
|
||||
# Common indicators for consulting reports
|
||||
INDICATORS = {
|
||||
"gdp": "NY.GDP.MKTP.CD", # GDP (current US$)
|
||||
"gdp_growth": "NY.GDP.MKTP.KD.ZG", # GDP growth (annual %)
|
||||
"gdp_per_capita": "NY.GDP.PCAP.CD", # GDP per capita
|
||||
"population": "SP.POP.TOTL", # Total population
|
||||
"inflation": "FP.CPI.TOTL.ZG", # Inflation (CPI %)
|
||||
"trade_pct_gdp": "NE.TRD.GNFS.ZS", # Trade (% of GDP)
|
||||
"fdi_net": "BX.KLT.DINV.CD.WD", # FDI net inflows
|
||||
"unemployment": "SL.UEM.TOTL.ZS", # Unemployment (%)
|
||||
"exports": "NE.EXP.GNFS.CD", # Exports
|
||||
"imports": "NE.IMP.GNFS.CD", # Imports
|
||||
"r_and_d": "GB.XPD.RSDV.GD.ZS", # R&D expenditure (% GDP)
|
||||
"high_tech_exports": "TX.VAL.TECH.MF.ZS", # High-tech exports (% manufactured)
|
||||
}
|
||||
|
||||
|
||||
class WorldBankSource(DataSource):
|
||||
name = "worldbank"
|
||||
description = "World Bank Open Data — 1600+ indicators, 217 economies, free"
|
||||
|
||||
def supports(self, data_type: str, country: str | None = None) -> bool:
|
||||
return data_type in ("macro", "general")
|
||||
|
||||
async def fetch(
|
||||
self, query: str, *, data_type: str = "general", country: str | None = None, **kwargs,
|
||||
) -> DataResult:
|
||||
indicator_code = kwargs.get("indicator")
|
||||
if not indicator_code:
|
||||
query_lower = query.lower()
|
||||
for key, code in INDICATORS.items():
|
||||
if key in query_lower:
|
||||
indicator_code = code
|
||||
break
|
||||
|
||||
if not indicator_code:
|
||||
# Default to GDP
|
||||
indicator_code = INDICATORS["gdp"]
|
||||
|
||||
country_code = country or "WLD" # WLD = World
|
||||
per_page = kwargs.get("per_page", 20)
|
||||
|
||||
url = f"{BASE_URL}/country/{country_code}/indicator/{indicator_code}"
|
||||
params = {
|
||||
"format": "json",
|
||||
"per_page": per_page,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if not data or len(data) < 2:
|
||||
return DataResult(source=self.name, data=None, error="No data returned")
|
||||
|
||||
metadata_raw = data[0]
|
||||
records = data[1]
|
||||
|
||||
# Parse into clean format
|
||||
clean_records = []
|
||||
for r in records:
|
||||
if r.get("value") is not None:
|
||||
clean_records.append({
|
||||
"year": r["date"],
|
||||
"value": r["value"],
|
||||
"country": r["country"]["value"],
|
||||
"indicator": r["indicator"]["value"],
|
||||
})
|
||||
|
||||
return DataResult(
|
||||
source=self.name,
|
||||
data={
|
||||
"indicator": indicator_code,
|
||||
"country": country_code,
|
||||
"records": clean_records,
|
||||
},
|
||||
metadata={
|
||||
"total": metadata_raw.get("total", 0),
|
||||
"indicator_name": clean_records[0]["indicator"] if clean_records else "",
|
||||
"format": "timeseries",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
return DataResult(source=self.name, error=f"World Bank API failed: {e}")
|
||||
Reference in New Issue
Block a user