init repo
This commit is contained in:
216
data_collector.py
Normal file
216
data_collector.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
数据收集模块 - 从各种API获取股票和财务数据
|
||||
"""
|
||||
import yfinance as yf
|
||||
import pandas as pd
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class StockDataCollector:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
})
|
||||
|
||||
def get_company_info(self, symbol: str) -> Dict:
|
||||
"""获取公司基本信息"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
info = ticker.info
|
||||
|
||||
return {
|
||||
'symbol': symbol,
|
||||
'name': info.get('longName', ''),
|
||||
'sector': info.get('sector', ''),
|
||||
'industry': info.get('industry', ''),
|
||||
'market_cap': info.get('marketCap', 0),
|
||||
'employees': info.get('fullTimeEmployees', 0),
|
||||
'website': info.get('website', ''),
|
||||
'description': info.get('longBusinessSummary', ''),
|
||||
'country': info.get('country', ''),
|
||||
'currency': info.get('currency', 'USD')
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取公司信息失败 {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def get_stock_prices(self, symbol: str, period: str = "1y") -> pd.DataFrame:
|
||||
"""获取股价数据"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
data = ticker.history(period=period)
|
||||
|
||||
if data.empty:
|
||||
logger.warning(f"未找到股价数据: {symbol}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# 重命名列以匹配数据库结构
|
||||
data = data.reset_index()
|
||||
data.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'dividends', 'stock_splits']
|
||||
data = data.drop(['dividends', 'stock_splits'], axis=1)
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.error(f"获取股价数据失败 {symbol}: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_financial_statements(self, symbol: str) -> Dict:
|
||||
"""获取财务报表数据"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
|
||||
# 获取季度和年度财务数据
|
||||
quarterly_data = {}
|
||||
annual_data = {}
|
||||
|
||||
# 季度数据
|
||||
try:
|
||||
quarterly_financials = ticker.quarterly_financials
|
||||
if not quarterly_financials.empty:
|
||||
for i, (date, row) in enumerate(quarterly_financials.iterrows()):
|
||||
quarterly_data[f"Q{i+1}_{date.year}"] = {
|
||||
'year': date.year,
|
||||
'quarter': (i % 4) + 1,
|
||||
'revenue': row.get('Total Revenue', 0),
|
||||
'net_income': row.get('Net Income', 0),
|
||||
'total_assets': row.get('Total Assets', 0),
|
||||
'total_liabilities': row.get('Total Liabilities', 0),
|
||||
'shareholders_equity': row.get('Stockholders Equity', 0),
|
||||
'cash': row.get('Cash And Cash Equivalents', 0),
|
||||
'debt': row.get('Total Debt', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"获取季度财务数据失败 {symbol}: {e}")
|
||||
|
||||
# 年度数据
|
||||
try:
|
||||
annual_financials = ticker.financials
|
||||
if not annual_financials.empty:
|
||||
for i, (date, row) in enumerate(annual_financials.iterrows()):
|
||||
annual_data[f"Annual_{date.year}"] = {
|
||||
'year': date.year,
|
||||
'quarter': 0,
|
||||
'revenue': row.get('Total Revenue', 0),
|
||||
'net_income': row.get('Net Income', 0),
|
||||
'total_assets': row.get('Total Assets', 0),
|
||||
'total_liabilities': row.get('Total Liabilities', 0),
|
||||
'shareholders_equity': row.get('Stockholders Equity', 0),
|
||||
'cash': row.get('Cash And Cash Equivalents', 0),
|
||||
'debt': row.get('Total Debt', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"获取年度财务数据失败 {symbol}: {e}")
|
||||
|
||||
return {**quarterly_data, **annual_data}
|
||||
except Exception as e:
|
||||
logger.error(f"获取财务数据失败 {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def get_key_metrics(self, symbol: str) -> Dict:
|
||||
"""获取关键财务指标"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
info = ticker.info
|
||||
|
||||
return {
|
||||
'pe_ratio': info.get('trailingPE', 0),
|
||||
'pb_ratio': info.get('priceToBook', 0),
|
||||
'ps_ratio': info.get('priceToSalesTrailing12Months', 0),
|
||||
'peg_ratio': info.get('pegRatio', 0),
|
||||
'debt_to_equity': info.get('debtToEquity', 0),
|
||||
'current_ratio': info.get('currentRatio', 0),
|
||||
'quick_ratio': info.get('quickRatio', 0),
|
||||
'return_on_equity': info.get('returnOnEquity', 0),
|
||||
'return_on_assets': info.get('returnOnAssets', 0),
|
||||
'profit_margin': info.get('profitMargins', 0),
|
||||
'operating_margin': info.get('operatingMargins', 0),
|
||||
'revenue_growth': info.get('revenueGrowth', 0),
|
||||
'earnings_growth': info.get('earningsGrowth', 0),
|
||||
'beta': info.get('beta', 0),
|
||||
'dividend_yield': info.get('dividendYield', 0),
|
||||
'payout_ratio': info.get('payoutRatio', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取关键指标失败 {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def get_analyst_recommendations(self, symbol: str) -> Dict:
|
||||
"""获取分析师推荐"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
recommendations = ticker.recommendations
|
||||
|
||||
if recommendations is None or recommendations.empty:
|
||||
return {}
|
||||
|
||||
# 获取最新的推荐
|
||||
latest_rec = recommendations.iloc[-1] if not recommendations.empty else None
|
||||
|
||||
return {
|
||||
'latest_recommendation': latest_rec.get('To Grade', '') if latest_rec is not None else '',
|
||||
'latest_firm': latest_rec.get('Firm', '') if latest_rec is not None else '',
|
||||
'latest_date': latest_rec.get('Date', '') if latest_rec is not None else '',
|
||||
'total_recommendations': len(recommendations)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"获取分析师推荐失败 {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def get_news_sentiment(self, symbol: str) -> Dict:
|
||||
"""获取新闻情绪分析(简化版)"""
|
||||
try:
|
||||
ticker = yf.Ticker(symbol)
|
||||
news = ticker.news
|
||||
|
||||
if not news:
|
||||
return {'sentiment_score': 0, 'news_count': 0}
|
||||
|
||||
# 简单的情绪分析(实际应用中可以使用更复杂的NLP模型)
|
||||
positive_keywords = ['growth', 'profit', 'increase', 'strong', 'positive', 'beat', 'exceed']
|
||||
negative_keywords = ['loss', 'decline', 'weak', 'negative', 'miss', 'fall', 'drop']
|
||||
|
||||
sentiment_score = 0
|
||||
for article in news[:10]: # 只分析最近10条新闻
|
||||
title = article.get('title', '').lower()
|
||||
summary = article.get('summary', '').lower()
|
||||
text = title + ' ' + summary
|
||||
|
||||
positive_count = sum(1 for word in positive_keywords if word in text)
|
||||
negative_count = sum(1 for word in negative_keywords if word in text)
|
||||
|
||||
sentiment_score += (positive_count - negative_count)
|
||||
|
||||
return {
|
||||
'sentiment_score': sentiment_score,
|
||||
'news_count': len(news),
|
||||
'recent_news': news[:5] # 最近5条新闻
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"获取新闻情绪失败 {symbol}: {e}")
|
||||
return {'sentiment_score': 0, 'news_count': 0}
|
||||
|
||||
def collect_all_data(self, symbol: str) -> Dict:
|
||||
"""收集所有相关数据"""
|
||||
logger.info(f"开始收集数据: {symbol}")
|
||||
|
||||
all_data = {
|
||||
'symbol': symbol,
|
||||
'collection_time': datetime.now().isoformat(),
|
||||
'company_info': self.get_company_info(symbol),
|
||||
'stock_prices': self.get_stock_prices(symbol),
|
||||
'financial_statements': self.get_financial_statements(symbol),
|
||||
'key_metrics': self.get_key_metrics(symbol),
|
||||
'analyst_recommendations': self.get_analyst_recommendations(symbol),
|
||||
'news_sentiment': self.get_news_sentiment(symbol)
|
||||
}
|
||||
|
||||
logger.info(f"数据收集完成: {symbol}")
|
||||
return all_data
|
||||
Reference in New Issue
Block a user