# -*- coding: utf-8 -*- """ 搜索系统主程序 提供命令行界面和简单的Web界面 """ import os import sys import logging import argparse from typing import Dict, List from pathlib import Path # 添加当前目录到Python路径 sys.path.append(str(Path(__file__).parent)) from config import LOGGING_CONFIG from database import DatabaseManager from search_engine import SearchEngine from document_exporter import DocumentExporter from rss_monitor import RSSMonitor class SearchSystemCLI: """搜索系统命令行界面""" def __init__(self): self.setup_logging() self.db = DatabaseManager() self.search_engine = SearchEngine() self.exporter = DocumentExporter() self.rss_monitor = RSSMonitor() self.logger = logging.getLogger(__name__) def setup_logging(self): """设置日志""" logging.basicConfig( level=LOGGING_CONFIG['level'], format=LOGGING_CONFIG['format'], handlers=[ logging.FileHandler(LOGGING_CONFIG['file'], encoding='utf-8'), logging.StreamHandler() ] ) def run_search(self, query: str, industry: str = None, language: str = None, export: bool = False) -> Dict: """执行搜索""" print(f"\n🔍 搜索查询: {query}") print(f"📊 行业: {industry or '全部'}") print(f"🌐 语言: {language or '自动检测'}") print("-" * 50) # 执行搜索 result = self.search_engine.search( query=query, industry=industry, language=language ) if not result['success']: print(f"❌ 搜索失败: {result.get('error', '未知错误')}") return result # 显示搜索结果 self.display_search_results(result) # 导出文档 if export and result['results']: export_result = self.exporter.export_search_results(result['search_log_id']) if export_result['success']: print(f"\n📄 文档导出成功: {export_result['filename']}") print(f"📁 文件路径: {export_result['file_path']}") else: print(f"❌ 文档导出失败: {export_result.get('error', '未知错误')}") return result def display_search_results(self, result: Dict): """显示搜索结果""" print(f"\n✅ 搜索完成!") print(f"📈 找到 {result['total_count']} 条结果") print(f"⏱️ 搜索耗时: {result['search_time']} 秒") print(f"🔗 检索源: {result['sources_searched']['total_sources']} 个") if not result['results']: print("\n📭 没有找到相关结果") return print(f"\n📰 搜索结果预览 (前5条):") print("=" * 80) for i, article in enumerate(result['results'][:5], 1): print(f"\n{i}. {article['title']}") print(f" 🏢 来源: {article['source_name']} ({self.get_authority_text(article['authority_level'])})") print(f" 📅 时间: {self.format_date(article.get('published_date', ''))}") print(f" 🎯 相关性: {article.get('final_score', 0):.2f}") print(f" 🔗 链接: {article['original_url']}") summary = article.get('summary', article.get('content', '')) if summary: summary = summary[:100] + '...' if len(summary) > 100 else summary print(f" 📝 摘要: {summary}") if len(result['results']) > 5: print(f"\n... 还有 {len(result['results']) - 5} 条结果") def get_authority_text(self, level: int) -> str: """获取权威级别文本""" authority_map = {1: '官方机构', 2: '主流媒体', 3: '专业平台', 4: '其他'} return authority_map.get(level, '其他') def format_date(self, date_str: str) -> str: """格式化日期""" if not date_str: return '未知' try: from datetime import datetime if isinstance(date_str, str): date_obj = datetime.fromisoformat(date_str.replace('Z', '')) else: date_obj = date_str return date_obj.strftime('%Y-%m-%d') except: return str(date_str) def show_statistics(self): """显示系统统计""" stats = self.db.get_statistics() print("\n📊 系统统计信息") print("=" * 40) print(f"📰 文章总数: {stats['total_articles']}") print(f"🆕 今日新增: {stats['today_articles']}") print(f"🔍 搜索总次数: {stats['total_searches']}") print(f"📡 活跃源数: {stats['active_sources']}") print(f"\n📈 各行业文章分布:") for item in stats['articles_by_industry'][:8]: print(f" {item['name_cn']}: {item['count']} 篇") def show_search_history(self, limit: int = 10): """显示搜索历史""" history = self.db.get_search_history(limit) print(f"\n📜 最近 {limit} 次搜索记录") print("=" * 60) for i, record in enumerate(history, 1): print(f"{i}. {record['keywords']}") print(f" 行业: {record.get('industry_name', '全部')} | " f"结果: {record['results_count']} 条 | " f"时间: {self.format_date(record['search_time'])}") def interactive_mode(self): """交互模式""" print("🚀 欢迎使用智能搜索系统!") print("输入 'help' 查看帮助,输入 'quit' 退出") while True: try: command = input("\n>>> ").strip() if command.lower() in ['quit', 'exit', 'q']: print("👋 再见!") break elif command.lower() == 'help': self.show_help() elif command.lower() == 'stats': self.show_statistics() elif command.lower() == 'history': self.show_search_history() elif command.startswith('search '): query = command[7:] self.run_search(query, export=True) elif command: # 直接搜索 self.run_search(command, export=True) else: print("请输入搜索查询或命令") except KeyboardInterrupt: print("\n👋 再见!") break except Exception as e: print(f"❌ 错误: {e}") def show_help(self): """显示帮助信息""" help_text = """ 🆘 命令帮助: search <查询词> - 执行搜索 stats - 查看统计信息 history - 查看搜索历史 help - 显示此帮助 quit/exit/q - 退出程序 🔍 搜索示例: search AI breakthrough 2024 search 英伟达最新财报 search renewable energy policy 💡 提示: - 英文搜索会自动使用英文信源 - 包含中文关键词会自动切换中文搜索 - 搜索结果会自动导出为DOCX文档 """ print(help_text) def create_web_app(): """创建简单的Web界面""" try: from flask import Flask, render_template_string, request, jsonify app = Flask(__name__) cli = SearchSystemCLI() # 简单的HTML模板 HTML_TEMPLATE = """ 智能搜索系统

🔍 智能搜索系统

支持8个行业的权威信息搜索

{% if search_result %}
搜索结果: {{ search_result.total_count }} 条 | 耗时: {{ search_result.search_time }} 秒 | 信源: {{ search_result.sources_searched.total_sources }} 个
{% for article in search_result.results[:10] %}
{{ loop.index }}. {{ article.title }}
📰 {{ article.source_name }} | 📅 {{ article.published_date or '未知时间' }} | 🎯 相关性: {{ "%.2f"|format(article.final_score or 0) }}
{{ article.summary[:200] }}...
🔗 查看原文
{% endfor %}
{% endif %} {% if error %}
❌ {{ error }}
{% endif %} """ @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': query = request.form.get('query', '').strip() industry = request.form.get('industry', '') or None if query: try: result = cli.search_engine.search(query, industry) if result['success']: return render_template_string(HTML_TEMPLATE, query=query, search_result=result) else: return render_template_string(HTML_TEMPLATE, query=query, error=result.get('error', '搜索失败')) except Exception as e: return render_template_string(HTML_TEMPLATE, query=query, error=str(e)) else: return render_template_string(HTML_TEMPLATE, query=query, error='请输入搜索关键词') return render_template_string(HTML_TEMPLATE) return app except ImportError: print("Flask未安装,无法启动Web界面") print("请运行: pip install flask") return None def main(): """主函数""" parser = argparse.ArgumentParser(description='智能搜索系统') parser.add_argument('--mode', choices=['cli', 'web', 'monitor'], default='cli', help='运行模式') parser.add_argument('--query', type=str, help='搜索查询') parser.add_argument('--industry', type=str, help='搜索行业') parser.add_argument('--language', type=str, choices=['en', 'cn'], help='搜索语言') parser.add_argument('--export', action='store_true', help='导出结果') parser.add_argument('--port', type=int, default=5000, help='Web端口') args = parser.parse_args() if args.mode == 'monitor': # RSS监控模式 print("🚀 启动RSS监控器...") from rss_monitor import start_rss_monitor start_rss_monitor() elif args.mode == 'web': # Web界面模式 app = create_web_app() if app: print(f"🌐 启动Web界面: http://localhost:{args.port}") app.run(host='0.0.0.0', port=args.port, debug=False) elif args.mode == 'cli': # 命令行模式 cli = SearchSystemCLI() if args.query: # 直接执行搜索 cli.run_search(args.query, args.industry, args.language, args.export) else: # 交互模式 cli.interactive_mode() if __name__ == "__main__": main()