367 lines
14 KiB
Python
367 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
搜索系统主程序
|
||
提供命令行界面和简单的Web界面
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import logging
|
||
import argparse
|
||
from typing import Dict, List
|
||
from pathlib import Path
|
||
|
||
# 添加当前目录到Python路径
|
||
sys.path.append(str(Path(__file__).parent))
|
||
|
||
from config import LOGGING_CONFIG
|
||
from database import DatabaseManager
|
||
from search_engine import SearchEngine
|
||
from document_exporter import DocumentExporter
|
||
from rss_monitor import RSSMonitor
|
||
|
||
class SearchSystemCLI:
|
||
"""搜索系统命令行界面"""
|
||
|
||
def __init__(self):
|
||
self.setup_logging()
|
||
self.db = DatabaseManager()
|
||
self.search_engine = SearchEngine()
|
||
self.exporter = DocumentExporter()
|
||
self.rss_monitor = RSSMonitor()
|
||
self.logger = logging.getLogger(__name__)
|
||
|
||
def setup_logging(self):
|
||
"""设置日志"""
|
||
logging.basicConfig(
|
||
level=LOGGING_CONFIG['level'],
|
||
format=LOGGING_CONFIG['format'],
|
||
handlers=[
|
||
logging.FileHandler(LOGGING_CONFIG['file'], encoding='utf-8'),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
|
||
def run_search(self, query: str, industry: str = None,
|
||
language: str = None, export: bool = False) -> Dict:
|
||
"""执行搜索"""
|
||
print(f"\n🔍 搜索查询: {query}")
|
||
print(f"📊 行业: {industry or '全部'}")
|
||
print(f"🌐 语言: {language or '自动检测'}")
|
||
print("-" * 50)
|
||
|
||
# 执行搜索
|
||
result = self.search_engine.search(
|
||
query=query,
|
||
industry=industry,
|
||
language=language
|
||
)
|
||
|
||
if not result['success']:
|
||
print(f"❌ 搜索失败: {result.get('error', '未知错误')}")
|
||
return result
|
||
|
||
# 显示搜索结果
|
||
self.display_search_results(result)
|
||
|
||
# 导出文档
|
||
if export and result['results']:
|
||
export_result = self.exporter.export_search_results(result['search_log_id'])
|
||
if export_result['success']:
|
||
print(f"\n📄 文档导出成功: {export_result['filename']}")
|
||
print(f"📁 文件路径: {export_result['file_path']}")
|
||
else:
|
||
print(f"❌ 文档导出失败: {export_result.get('error', '未知错误')}")
|
||
|
||
return result
|
||
|
||
def display_search_results(self, result: Dict):
|
||
"""显示搜索结果"""
|
||
print(f"\n✅ 搜索完成!")
|
||
print(f"📈 找到 {result['total_count']} 条结果")
|
||
print(f"⏱️ 搜索耗时: {result['search_time']} 秒")
|
||
print(f"🔗 检索源: {result['sources_searched']['total_sources']} 个")
|
||
|
||
if not result['results']:
|
||
print("\n📭 没有找到相关结果")
|
||
return
|
||
|
||
print(f"\n📰 搜索结果预览 (前5条):")
|
||
print("=" * 80)
|
||
|
||
for i, article in enumerate(result['results'][:5], 1):
|
||
print(f"\n{i}. {article['title']}")
|
||
print(f" 🏢 来源: {article['source_name']} ({self.get_authority_text(article['authority_level'])})")
|
||
print(f" 📅 时间: {self.format_date(article.get('published_date', ''))}")
|
||
print(f" 🎯 相关性: {article.get('final_score', 0):.2f}")
|
||
print(f" 🔗 链接: {article['original_url']}")
|
||
|
||
summary = article.get('summary', article.get('content', ''))
|
||
if summary:
|
||
summary = summary[:100] + '...' if len(summary) > 100 else summary
|
||
print(f" 📝 摘要: {summary}")
|
||
|
||
if len(result['results']) > 5:
|
||
print(f"\n... 还有 {len(result['results']) - 5} 条结果")
|
||
|
||
def get_authority_text(self, level: int) -> str:
|
||
"""获取权威级别文本"""
|
||
authority_map = {1: '官方机构', 2: '主流媒体', 3: '专业平台', 4: '其他'}
|
||
return authority_map.get(level, '其他')
|
||
|
||
def format_date(self, date_str: str) -> str:
|
||
"""格式化日期"""
|
||
if not date_str:
|
||
return '未知'
|
||
try:
|
||
from datetime import datetime
|
||
if isinstance(date_str, str):
|
||
date_obj = datetime.fromisoformat(date_str.replace('Z', ''))
|
||
else:
|
||
date_obj = date_str
|
||
return date_obj.strftime('%Y-%m-%d')
|
||
except:
|
||
return str(date_str)
|
||
|
||
def show_statistics(self):
|
||
"""显示系统统计"""
|
||
stats = self.db.get_statistics()
|
||
|
||
print("\n📊 系统统计信息")
|
||
print("=" * 40)
|
||
print(f"📰 文章总数: {stats['total_articles']}")
|
||
print(f"🆕 今日新增: {stats['today_articles']}")
|
||
print(f"🔍 搜索总次数: {stats['total_searches']}")
|
||
print(f"📡 活跃源数: {stats['active_sources']}")
|
||
|
||
print(f"\n📈 各行业文章分布:")
|
||
for item in stats['articles_by_industry'][:8]:
|
||
print(f" {item['name_cn']}: {item['count']} 篇")
|
||
|
||
def show_search_history(self, limit: int = 10):
|
||
"""显示搜索历史"""
|
||
history = self.db.get_search_history(limit)
|
||
|
||
print(f"\n📜 最近 {limit} 次搜索记录")
|
||
print("=" * 60)
|
||
|
||
for i, record in enumerate(history, 1):
|
||
print(f"{i}. {record['keywords']}")
|
||
print(f" 行业: {record.get('industry_name', '全部')} | "
|
||
f"结果: {record['results_count']} 条 | "
|
||
f"时间: {self.format_date(record['search_time'])}")
|
||
|
||
def interactive_mode(self):
|
||
"""交互模式"""
|
||
print("🚀 欢迎使用智能搜索系统!")
|
||
print("输入 'help' 查看帮助,输入 'quit' 退出")
|
||
|
||
while True:
|
||
try:
|
||
command = input("\n>>> ").strip()
|
||
|
||
if command.lower() in ['quit', 'exit', 'q']:
|
||
print("👋 再见!")
|
||
break
|
||
elif command.lower() == 'help':
|
||
self.show_help()
|
||
elif command.lower() == 'stats':
|
||
self.show_statistics()
|
||
elif command.lower() == 'history':
|
||
self.show_search_history()
|
||
elif command.startswith('search '):
|
||
query = command[7:]
|
||
self.run_search(query, export=True)
|
||
elif command:
|
||
# 直接搜索
|
||
self.run_search(command, export=True)
|
||
else:
|
||
print("请输入搜索查询或命令")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n👋 再见!")
|
||
break
|
||
except Exception as e:
|
||
print(f"❌ 错误: {e}")
|
||
|
||
def show_help(self):
|
||
"""显示帮助信息"""
|
||
help_text = """
|
||
🆘 命令帮助:
|
||
search <查询词> - 执行搜索
|
||
stats - 查看统计信息
|
||
history - 查看搜索历史
|
||
help - 显示此帮助
|
||
quit/exit/q - 退出程序
|
||
|
||
🔍 搜索示例:
|
||
search AI breakthrough 2024
|
||
search 英伟达最新财报
|
||
search renewable energy policy
|
||
|
||
💡 提示:
|
||
- 英文搜索会自动使用英文信源
|
||
- 包含中文关键词会自动切换中文搜索
|
||
- 搜索结果会自动导出为DOCX文档
|
||
"""
|
||
print(help_text)
|
||
|
||
def create_web_app():
|
||
"""创建简单的Web界面"""
|
||
try:
|
||
from flask import Flask, render_template_string, request, jsonify
|
||
|
||
app = Flask(__name__)
|
||
cli = SearchSystemCLI()
|
||
|
||
# 简单的HTML模板
|
||
HTML_TEMPLATE = """
|
||
<!DOCTYPE html>
|
||
<html>
|
||
<head>
|
||
<title>智能搜索系统</title>
|
||
<meta charset="utf-8">
|
||
<style>
|
||
body { font-family: Arial, sans-serif; margin: 40px; }
|
||
.header { text-align: center; margin-bottom: 30px; }
|
||
.search-box { text-align: center; margin-bottom: 30px; }
|
||
input[type="text"] { padding: 10px; width: 400px; font-size: 16px; }
|
||
button { padding: 10px 20px; font-size: 16px; margin-left: 10px; }
|
||
.results { margin-top: 30px; }
|
||
.result-item { border: 1px solid #ddd; margin: 10px 0; padding: 15px; }
|
||
.result-title { font-weight: bold; color: #2c5aa0; }
|
||
.result-meta { color: #666; font-size: 14px; margin: 5px 0; }
|
||
.result-summary { margin: 10px 0; }
|
||
.stats { background: #f5f5f5; padding: 15px; margin: 20px 0; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="header">
|
||
<h1>🔍 智能搜索系统</h1>
|
||
<p>支持8个行业的权威信息搜索</p>
|
||
</div>
|
||
|
||
<div class="search-box">
|
||
<form method="POST">
|
||
<input type="text" name="query" placeholder="输入搜索关键词..." value="{{ query or '' }}">
|
||
<select name="industry">
|
||
<option value="">全部行业</option>
|
||
<option value="finance">金融</option>
|
||
<option value="ai_software">AI/软件</option>
|
||
<option value="manufacturing">制造业</option>
|
||
<option value="healthcare_pharma">医疗制药</option>
|
||
<option value="fmcg">快消品</option>
|
||
<option value="ecommerce_retail">零售电商</option>
|
||
<option value="energy_chemical">能源化工</option>
|
||
<option value="real_estate">房地产建筑</option>
|
||
</select>
|
||
<button type="submit">搜索</button>
|
||
</form>
|
||
</div>
|
||
|
||
{% if search_result %}
|
||
<div class="stats">
|
||
<strong>搜索结果:</strong> {{ search_result.total_count }} 条 |
|
||
<strong>耗时:</strong> {{ search_result.search_time }} 秒 |
|
||
<strong>信源:</strong> {{ search_result.sources_searched.total_sources }} 个
|
||
</div>
|
||
|
||
<div class="results">
|
||
{% for article in search_result.results[:10] %}
|
||
<div class="result-item">
|
||
<div class="result-title">{{ loop.index }}. {{ article.title }}</div>
|
||
<div class="result-meta">
|
||
📰 {{ article.source_name }} |
|
||
📅 {{ article.published_date or '未知时间' }} |
|
||
🎯 相关性: {{ "%.2f"|format(article.final_score or 0) }}
|
||
</div>
|
||
<div class="result-summary">{{ article.summary[:200] }}...</div>
|
||
<div><a href="{{ article.original_url }}" target="_blank">🔗 查看原文</a></div>
|
||
</div>
|
||
{% endfor %}
|
||
</div>
|
||
{% endif %}
|
||
|
||
{% if error %}
|
||
<div style="color: red; text-align: center;">
|
||
❌ {{ error }}
|
||
</div>
|
||
{% endif %}
|
||
</body>
|
||
</html>
|
||
"""
|
||
|
||
@app.route('/', methods=['GET', 'POST'])
|
||
def index():
|
||
if request.method == 'POST':
|
||
query = request.form.get('query', '').strip()
|
||
industry = request.form.get('industry', '') or None
|
||
|
||
if query:
|
||
try:
|
||
result = cli.search_engine.search(query, industry)
|
||
if result['success']:
|
||
return render_template_string(HTML_TEMPLATE,
|
||
query=query,
|
||
search_result=result)
|
||
else:
|
||
return render_template_string(HTML_TEMPLATE,
|
||
query=query,
|
||
error=result.get('error', '搜索失败'))
|
||
except Exception as e:
|
||
return render_template_string(HTML_TEMPLATE,
|
||
query=query,
|
||
error=str(e))
|
||
else:
|
||
return render_template_string(HTML_TEMPLATE,
|
||
query=query,
|
||
error='请输入搜索关键词')
|
||
|
||
return render_template_string(HTML_TEMPLATE)
|
||
|
||
return app
|
||
|
||
except ImportError:
|
||
print("Flask未安装,无法启动Web界面")
|
||
print("请运行: pip install flask")
|
||
return None
|
||
|
||
def main():
|
||
"""主函数"""
|
||
parser = argparse.ArgumentParser(description='智能搜索系统')
|
||
parser.add_argument('--mode', choices=['cli', 'web', 'monitor'],
|
||
default='cli', help='运行模式')
|
||
parser.add_argument('--query', type=str, help='搜索查询')
|
||
parser.add_argument('--industry', type=str, help='搜索行业')
|
||
parser.add_argument('--language', type=str, choices=['en', 'cn'], help='搜索语言')
|
||
parser.add_argument('--export', action='store_true', help='导出结果')
|
||
parser.add_argument('--port', type=int, default=5000, help='Web端口')
|
||
|
||
args = parser.parse_args()
|
||
|
||
if args.mode == 'monitor':
|
||
# RSS监控模式
|
||
print("🚀 启动RSS监控器...")
|
||
from rss_monitor import start_rss_monitor
|
||
start_rss_monitor()
|
||
|
||
elif args.mode == 'web':
|
||
# Web界面模式
|
||
app = create_web_app()
|
||
if app:
|
||
print(f"🌐 启动Web界面: http://localhost:{args.port}")
|
||
app.run(host='0.0.0.0', port=args.port, debug=False)
|
||
|
||
elif args.mode == 'cli':
|
||
# 命令行模式
|
||
cli = SearchSystemCLI()
|
||
|
||
if args.query:
|
||
# 直接执行搜索
|
||
cli.run_search(args.query, args.industry, args.language, args.export)
|
||
else:
|
||
# 交互模式
|
||
cli.interactive_mode()
|
||
|
||
if __name__ == "__main__":
|
||
main() |