Files
2026-04-25 19:21:03 +08:00

367 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
搜索系统主程序
提供命令行界面和简单的Web界面
"""
import os
import sys
import logging
import argparse
from typing import Dict, List
from pathlib import Path
# 添加当前目录到Python路径
sys.path.append(str(Path(__file__).parent))
from config import LOGGING_CONFIG
from database import DatabaseManager
from search_engine import SearchEngine
from document_exporter import DocumentExporter
from rss_monitor import RSSMonitor
class SearchSystemCLI:
"""搜索系统命令行界面"""
def __init__(self):
self.setup_logging()
self.db = DatabaseManager()
self.search_engine = SearchEngine()
self.exporter = DocumentExporter()
self.rss_monitor = RSSMonitor()
self.logger = logging.getLogger(__name__)
def setup_logging(self):
"""设置日志"""
logging.basicConfig(
level=LOGGING_CONFIG['level'],
format=LOGGING_CONFIG['format'],
handlers=[
logging.FileHandler(LOGGING_CONFIG['file'], encoding='utf-8'),
logging.StreamHandler()
]
)
def run_search(self, query: str, industry: str = None,
language: str = None, export: bool = False) -> Dict:
"""执行搜索"""
print(f"\n🔍 搜索查询: {query}")
print(f"📊 行业: {industry or '全部'}")
print(f"🌐 语言: {language or '自动检测'}")
print("-" * 50)
# 执行搜索
result = self.search_engine.search(
query=query,
industry=industry,
language=language
)
if not result['success']:
print(f"❌ 搜索失败: {result.get('error', '未知错误')}")
return result
# 显示搜索结果
self.display_search_results(result)
# 导出文档
if export and result['results']:
export_result = self.exporter.export_search_results(result['search_log_id'])
if export_result['success']:
print(f"\n📄 文档导出成功: {export_result['filename']}")
print(f"📁 文件路径: {export_result['file_path']}")
else:
print(f"❌ 文档导出失败: {export_result.get('error', '未知错误')}")
return result
def display_search_results(self, result: Dict):
"""显示搜索结果"""
print(f"\n✅ 搜索完成!")
print(f"📈 找到 {result['total_count']} 条结果")
print(f"⏱️ 搜索耗时: {result['search_time']}")
print(f"🔗 检索源: {result['sources_searched']['total_sources']}")
if not result['results']:
print("\n📭 没有找到相关结果")
return
print(f"\n📰 搜索结果预览 (前5条):")
print("=" * 80)
for i, article in enumerate(result['results'][:5], 1):
print(f"\n{i}. {article['title']}")
print(f" 🏢 来源: {article['source_name']} ({self.get_authority_text(article['authority_level'])})")
print(f" 📅 时间: {self.format_date(article.get('published_date', ''))}")
print(f" 🎯 相关性: {article.get('final_score', 0):.2f}")
print(f" 🔗 链接: {article['original_url']}")
summary = article.get('summary', article.get('content', ''))
if summary:
summary = summary[:100] + '...' if len(summary) > 100 else summary
print(f" 📝 摘要: {summary}")
if len(result['results']) > 5:
print(f"\n... 还有 {len(result['results']) - 5} 条结果")
def get_authority_text(self, level: int) -> str:
"""获取权威级别文本"""
authority_map = {1: '官方机构', 2: '主流媒体', 3: '专业平台', 4: '其他'}
return authority_map.get(level, '其他')
def format_date(self, date_str: str) -> str:
"""格式化日期"""
if not date_str:
return '未知'
try:
from datetime import datetime
if isinstance(date_str, str):
date_obj = datetime.fromisoformat(date_str.replace('Z', ''))
else:
date_obj = date_str
return date_obj.strftime('%Y-%m-%d')
except:
return str(date_str)
def show_statistics(self):
"""显示系统统计"""
stats = self.db.get_statistics()
print("\n📊 系统统计信息")
print("=" * 40)
print(f"📰 文章总数: {stats['total_articles']}")
print(f"🆕 今日新增: {stats['today_articles']}")
print(f"🔍 搜索总次数: {stats['total_searches']}")
print(f"📡 活跃源数: {stats['active_sources']}")
print(f"\n📈 各行业文章分布:")
for item in stats['articles_by_industry'][:8]:
print(f" {item['name_cn']}: {item['count']}")
def show_search_history(self, limit: int = 10):
"""显示搜索历史"""
history = self.db.get_search_history(limit)
print(f"\n📜 最近 {limit} 次搜索记录")
print("=" * 60)
for i, record in enumerate(history, 1):
print(f"{i}. {record['keywords']}")
print(f" 行业: {record.get('industry_name', '全部')} | "
f"结果: {record['results_count']} 条 | "
f"时间: {self.format_date(record['search_time'])}")
def interactive_mode(self):
"""交互模式"""
print("🚀 欢迎使用智能搜索系统!")
print("输入 'help' 查看帮助,输入 'quit' 退出")
while True:
try:
command = input("\n>>> ").strip()
if command.lower() in ['quit', 'exit', 'q']:
print("👋 再见!")
break
elif command.lower() == 'help':
self.show_help()
elif command.lower() == 'stats':
self.show_statistics()
elif command.lower() == 'history':
self.show_search_history()
elif command.startswith('search '):
query = command[7:]
self.run_search(query, export=True)
elif command:
# 直接搜索
self.run_search(command, export=True)
else:
print("请输入搜索查询或命令")
except KeyboardInterrupt:
print("\n👋 再见!")
break
except Exception as e:
print(f"❌ 错误: {e}")
def show_help(self):
"""显示帮助信息"""
help_text = """
🆘 命令帮助:
search <查询词> - 执行搜索
stats - 查看统计信息
history - 查看搜索历史
help - 显示此帮助
quit/exit/q - 退出程序
🔍 搜索示例:
search AI breakthrough 2024
search 英伟达最新财报
search renewable energy policy
💡 提示:
- 英文搜索会自动使用英文信源
- 包含中文关键词会自动切换中文搜索
- 搜索结果会自动导出为DOCX文档
"""
print(help_text)
def create_web_app():
"""创建简单的Web界面"""
try:
from flask import Flask, render_template_string, request, jsonify
app = Flask(__name__)
cli = SearchSystemCLI()
# 简单的HTML模板
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<title>智能搜索系统</title>
<meta charset="utf-8">
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
.header { text-align: center; margin-bottom: 30px; }
.search-box { text-align: center; margin-bottom: 30px; }
input[type="text"] { padding: 10px; width: 400px; font-size: 16px; }
button { padding: 10px 20px; font-size: 16px; margin-left: 10px; }
.results { margin-top: 30px; }
.result-item { border: 1px solid #ddd; margin: 10px 0; padding: 15px; }
.result-title { font-weight: bold; color: #2c5aa0; }
.result-meta { color: #666; font-size: 14px; margin: 5px 0; }
.result-summary { margin: 10px 0; }
.stats { background: #f5f5f5; padding: 15px; margin: 20px 0; }
</style>
</head>
<body>
<div class="header">
<h1>🔍 智能搜索系统</h1>
<p>支持8个行业的权威信息搜索</p>
</div>
<div class="search-box">
<form method="POST">
<input type="text" name="query" placeholder="输入搜索关键词..." value="{{ query or '' }}">
<select name="industry">
<option value="">全部行业</option>
<option value="finance">金融</option>
<option value="ai_software">AI/软件</option>
<option value="manufacturing">制造业</option>
<option value="healthcare_pharma">医疗制药</option>
<option value="fmcg">快消品</option>
<option value="ecommerce_retail">零售电商</option>
<option value="energy_chemical">能源化工</option>
<option value="real_estate">房地产建筑</option>
</select>
<button type="submit">搜索</button>
</form>
</div>
{% if search_result %}
<div class="stats">
<strong>搜索结果:</strong> {{ search_result.total_count }} 条 |
<strong>耗时:</strong> {{ search_result.search_time }} 秒 |
<strong>信源:</strong> {{ search_result.sources_searched.total_sources }} 个
</div>
<div class="results">
{% for article in search_result.results[:10] %}
<div class="result-item">
<div class="result-title">{{ loop.index }}. {{ article.title }}</div>
<div class="result-meta">
📰 {{ article.source_name }} |
📅 {{ article.published_date or '未知时间' }} |
🎯 相关性: {{ "%.2f"|format(article.final_score or 0) }}
</div>
<div class="result-summary">{{ article.summary[:200] }}...</div>
<div><a href="{{ article.original_url }}" target="_blank">🔗 查看原文</a></div>
</div>
{% endfor %}
</div>
{% endif %}
{% if error %}
<div style="color: red; text-align: center;">
{{ error }}
</div>
{% endif %}
</body>
</html>
"""
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
query = request.form.get('query', '').strip()
industry = request.form.get('industry', '') or None
if query:
try:
result = cli.search_engine.search(query, industry)
if result['success']:
return render_template_string(HTML_TEMPLATE,
query=query,
search_result=result)
else:
return render_template_string(HTML_TEMPLATE,
query=query,
error=result.get('error', '搜索失败'))
except Exception as e:
return render_template_string(HTML_TEMPLATE,
query=query,
error=str(e))
else:
return render_template_string(HTML_TEMPLATE,
query=query,
error='请输入搜索关键词')
return render_template_string(HTML_TEMPLATE)
return app
except ImportError:
print("Flask未安装无法启动Web界面")
print("请运行: pip install flask")
return None
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='智能搜索系统')
parser.add_argument('--mode', choices=['cli', 'web', 'monitor'],
default='cli', help='运行模式')
parser.add_argument('--query', type=str, help='搜索查询')
parser.add_argument('--industry', type=str, help='搜索行业')
parser.add_argument('--language', type=str, choices=['en', 'cn'], help='搜索语言')
parser.add_argument('--export', action='store_true', help='导出结果')
parser.add_argument('--port', type=int, default=5000, help='Web端口')
args = parser.parse_args()
if args.mode == 'monitor':
# RSS监控模式
print("🚀 启动RSS监控器...")
from rss_monitor import start_rss_monitor
start_rss_monitor()
elif args.mode == 'web':
# Web界面模式
app = create_web_app()
if app:
print(f"🌐 启动Web界面: http://localhost:{args.port}")
app.run(host='0.0.0.0', port=args.port, debug=False)
elif args.mode == 'cli':
# 命令行模式
cli = SearchSystemCLI()
if args.query:
# 直接执行搜索
cli.run_search(args.query, args.industry, args.language, args.export)
else:
# 交互模式
cli.interactive_mode()
if __name__ == "__main__":
main()