Files
20250715-66bfff96/代码实现/config.py
2026-04-25 19:21:03 +08:00

216 lines
5.7 KiB
Python

# -*- coding: utf-8 -*-
"""
搜索系统配置文件
"""
import os
from pathlib import Path
# 基础配置
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
EXPORT_DIR = BASE_DIR.parent / "新闻"
# 确保目录存在
DATA_DIR.mkdir(exist_ok=True)
EXPORT_DIR.mkdir(exist_ok=True)
# 数据库配置
DATABASE_CONFIG = {
'type': 'sqlite', # 'sqlite', 'mysql', 'postgresql'
'sqlite': {
'path': DATA_DIR / "search_system.db"
},
'mysql': {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': '',
'database': 'search_system'
}
}
# API配置
API_CONFIG = {
'newsapi': {
'key': os.getenv('NEWSAPI_KEY', ''),
'base_url': 'https://newsapi.org/v2/',
'rate_limit': 1000 # 每日请求限制
},
'twitter': {
'bearer_token': os.getenv('TWITTER_BEARER_TOKEN', ''),
'base_url': 'https://api.twitter.com/2/',
'rate_limit': 300 # 每15分钟请求限制
},
'alpha_vantage': {
'key': os.getenv('ALPHA_VANTAGE_KEY', ''),
'base_url': 'https://www.alphavantage.co/query',
'rate_limit': 5 # 每分钟请求限制
}
}
# RSS源配置
RSS_SOURCES = {
'finance': [
{
'name': 'Federal Reserve',
'url': 'https://www.federalreserve.gov/feeds/press_all.xml',
'authority_level': 1,
'language': 'en'
},
{
'name': 'SEC',
'url': 'https://www.sec.gov/rss/news/press-release.xml',
'authority_level': 1,
'language': 'en'
},
{
'name': 'Bloomberg Markets',
'url': 'https://feeds.bloomberg.com/markets/news.rss',
'authority_level': 2,
'language': 'en'
},
{
'name': 'Reuters Finance',
'url': 'https://feeds.reuters.com/reuters/businessNews',
'authority_level': 2,
'language': 'en'
},
{
'name': 'Financial Times',
'url': 'https://www.ft.com/rss/home',
'authority_level': 2,
'language': 'en'
},
{
'name': 'Wall Street Journal',
'url': 'https://feeds.a.dj.com/rss/RSSMarketsMain.xml',
'authority_level': 2,
'language': 'en'
}
],
'ai_software': [
{
'name': 'arXiv Computer Science',
'url': 'http://rss.arxiv.org/rss/cs',
'authority_level': 1,
'language': 'en'
},
{
'name': 'Google AI Blog',
'url': 'https://ai.googleblog.com/feeds/posts/default',
'authority_level': 1,
'language': 'en'
},
{
'name': 'OpenAI Blog',
'url': 'https://openai.com/blog/rss.xml',
'authority_level': 1,
'language': 'en'
},
{
'name': 'MIT Technology Review',
'url': 'https://www.technologyreview.com/feed/',
'authority_level': 2,
'language': 'en'
},
{
'name': 'TechCrunch',
'url': 'https://techcrunch.com/feed/',
'authority_level': 2,
'language': 'en'
},
{
'name': 'The Verge',
'url': 'https://www.theverge.com/rss/index.xml',
'authority_level': 2,
'language': 'en'
}
],
'manufacturing': [
{
'name': 'ISO News',
'url': 'https://www.iso.org/rss/news.xml',
'authority_level': 1,
'language': 'en'
},
{
'name': 'IEEE Spectrum',
'url': 'https://spectrum.ieee.org/rss/fulltext',
'authority_level': 1,
'language': 'en'
},
{
'name': 'Industry Week',
'url': 'https://www.industryweek.com/rss.xml',
'authority_level': 2,
'language': 'en'
},
{
'name': 'Manufacturing.net',
'url': 'https://www.manufacturing.net/rss.xml',
'authority_level': 3,
'language': 'en'
}
],
'healthcare_pharma': [
{
'name': 'FDA News',
'url': 'https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds',
'authority_level': 1,
'language': 'en'
},
{
'name': 'NIH News',
'url': 'https://www.nih.gov/news-events/rss',
'authority_level': 1,
'language': 'en'
},
{
'name': 'WHO News',
'url': 'https://www.who.int/rss-feeds',
'authority_level': 1,
'language': 'en'
},
{
'name': 'STAT News',
'url': 'https://www.statnews.com/feed/',
'authority_level': 2,
'language': 'en'
}
]
}
# 搜索配置
SEARCH_CONFIG = {
'max_results_per_source': 50,
'search_timeout': 30,
'min_relevance_score': 0.3,
'default_language': 'en',
'keywords_for_china': ['中国', '国内', 'A股', '人民币', '央行', '国务院']
}
# 文档导出配置
EXPORT_CONFIG = {
'default_format': 'docx',
'template_path': BASE_DIR / 'templates',
'max_articles_per_doc': 20,
'include_source_links': True
}
# 日志配置
LOGGING_CONFIG = {
'level': 'INFO',
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
'file': DATA_DIR / 'search_system.log',
'max_size': 10 * 1024 * 1024, # 10MB
'backup_count': 5
}
# RSS监控配置
RSS_MONITOR_CONFIG = {
'check_interval': 3600, # 1小时检查一次
'max_retries': 3,
'timeout': 30,
'user_agent': 'SearchSystem/1.0 (RSS Monitor)'
}