"""World Bank Open Data — global macro indicators, 217 economies, free API. API: https://api.worldbank.org/v2/ """ from __future__ import annotations import logging from typing import Any import httpx from .base import DataSource, DataResult logger = logging.getLogger(__name__) BASE_URL = "https://api.worldbank.org/v2" # Common indicators for consulting reports INDICATORS = { "gdp": "NY.GDP.MKTP.CD", # GDP (current US$) "gdp_growth": "NY.GDP.MKTP.KD.ZG", # GDP growth (annual %) "gdp_per_capita": "NY.GDP.PCAP.CD", # GDP per capita "population": "SP.POP.TOTL", # Total population "inflation": "FP.CPI.TOTL.ZG", # Inflation (CPI %) "trade_pct_gdp": "NE.TRD.GNFS.ZS", # Trade (% of GDP) "fdi_net": "BX.KLT.DINV.CD.WD", # FDI net inflows "unemployment": "SL.UEM.TOTL.ZS", # Unemployment (%) "exports": "NE.EXP.GNFS.CD", # Exports "imports": "NE.IMP.GNFS.CD", # Imports "r_and_d": "GB.XPD.RSDV.GD.ZS", # R&D expenditure (% GDP) "high_tech_exports": "TX.VAL.TECH.MF.ZS", # High-tech exports (% manufactured) } class WorldBankSource(DataSource): name = "worldbank" description = "World Bank Open Data — 1600+ indicators, 217 economies, free" def supports(self, data_type: str, country: str | None = None) -> bool: return data_type in ("macro", "general") async def fetch( self, query: str, *, data_type: str = "general", country: str | None = None, **kwargs, ) -> DataResult: indicator_code = kwargs.get("indicator") if not indicator_code: query_lower = query.lower() for key, code in INDICATORS.items(): if key in query_lower: indicator_code = code break if not indicator_code: # Default to GDP indicator_code = INDICATORS["gdp"] country_code = country or "WLD" # WLD = World per_page = kwargs.get("per_page", 20) url = f"{BASE_URL}/country/{country_code}/indicator/{indicator_code}" params = { "format": "json", "per_page": per_page, } try: async with httpx.AsyncClient(timeout=15) as client: resp = await client.get(url, params=params) resp.raise_for_status() data = resp.json() if not data or len(data) < 2: return DataResult(source=self.name, data=None, error="No data returned") metadata_raw = data[0] records = data[1] # Parse into clean format clean_records = [] for r in records: if r.get("value") is not None: clean_records.append({ "year": r["date"], "value": r["value"], "country": r["country"]["value"], "indicator": r["indicator"]["value"], }) return DataResult( source=self.name, data={ "indicator": indicator_code, "country": country_code, "records": clean_records, }, metadata={ "total": metadata_raw.get("total", 0), "indicator_name": clean_records[0]["indicator"] if clean_records else "", "format": "timeseries", }, ) except Exception as e: return DataResult(source=self.name, error=f"World Bank API failed: {e}")