101 lines
3.8 KiB
SQL
101 lines
3.8 KiB
SQL
-- 搜索系统数据库结构
|
|
-- 适用于 SQLite/MySQL/PostgreSQL
|
|
|
|
-- 1. 行业分类表
|
|
CREATE TABLE industries (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name_en VARCHAR(50) NOT NULL UNIQUE,
|
|
name_cn VARCHAR(50) NOT NULL,
|
|
description TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- 2. 信息源配置表
|
|
CREATE TABLE rss_sources (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
industry_id INTEGER NOT NULL,
|
|
source_name VARCHAR(100) NOT NULL,
|
|
source_url VARCHAR(500) NOT NULL,
|
|
source_type VARCHAR(20) NOT NULL, -- 'rss', 'api', 'manual'
|
|
authority_level INTEGER DEFAULT 3, -- 1=官方机构, 2=主流媒体, 3=专业平台, 4=其他
|
|
language VARCHAR(2) DEFAULT 'en', -- 'en', 'cn'
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
last_checked TIMESTAMP,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (industry_id) REFERENCES industries(id)
|
|
);
|
|
|
|
-- 3. 搜索记录表
|
|
CREATE TABLE search_logs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
keywords TEXT NOT NULL,
|
|
industry_id INTEGER,
|
|
language VARCHAR(2) DEFAULT 'en',
|
|
results_count INTEGER DEFAULT 0,
|
|
search_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
user_ip VARCHAR(45),
|
|
FOREIGN KEY (industry_id) REFERENCES industries(id)
|
|
);
|
|
|
|
-- 4. 文章内容表
|
|
CREATE TABLE articles (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
content TEXT,
|
|
summary TEXT,
|
|
author VARCHAR(200),
|
|
source_id INTEGER NOT NULL,
|
|
original_url VARCHAR(1000) NOT NULL,
|
|
published_date TIMESTAMP,
|
|
scraped_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
language VARCHAR(2) DEFAULT 'en',
|
|
keywords TEXT, -- JSON格式存储关键词
|
|
article_hash VARCHAR(64) UNIQUE, -- 防重复
|
|
is_archived BOOLEAN DEFAULT FALSE,
|
|
FOREIGN KEY (source_id) REFERENCES rss_sources(id)
|
|
);
|
|
|
|
-- 5. 搜索结果表
|
|
CREATE TABLE search_results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_log_id INTEGER NOT NULL,
|
|
article_id INTEGER NOT NULL,
|
|
relevance_score FLOAT DEFAULT 0.0,
|
|
rank_position INTEGER,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (search_log_id) REFERENCES search_logs(id),
|
|
FOREIGN KEY (article_id) REFERENCES articles(id)
|
|
);
|
|
|
|
-- 6. 导出文档记录表
|
|
CREATE TABLE exported_docs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_log_id INTEGER NOT NULL,
|
|
filename VARCHAR(255) NOT NULL,
|
|
file_path VARCHAR(500) NOT NULL,
|
|
doc_type VARCHAR(20) DEFAULT 'docx', -- 'docx', 'pdf', 'txt'
|
|
articles_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (search_log_id) REFERENCES search_logs(id)
|
|
);
|
|
|
|
-- 插入基础数据
|
|
INSERT INTO industries (name_en, name_cn, description) VALUES
|
|
('finance', '金融行业', '银行、证券、保险、投资等金融服务'),
|
|
('ai_software', 'AI与软件', '人工智能、软件开发、技术创新'),
|
|
('manufacturing', '制造业', '工业制造、自动化、生产技术'),
|
|
('healthcare_pharma', '医疗制药', '医疗健康、制药、生物技术'),
|
|
('fmcg', '快消品', '快速消费品、零售、品牌营销'),
|
|
('ecommerce_retail', '零售电商', '电子商务、零售业、数字营销'),
|
|
('energy_chemical', '能源化工', '能源、化工、石油、新能源'),
|
|
('real_estate', '房地产建筑', '房地产、建筑、基础设施');
|
|
|
|
-- 创建索引优化查询性能
|
|
CREATE INDEX idx_articles_published_date ON articles(published_date);
|
|
CREATE INDEX idx_articles_source_id ON articles(source_id);
|
|
CREATE INDEX idx_articles_language ON articles(language);
|
|
CREATE INDEX idx_articles_hash ON articles(article_hash);
|
|
CREATE INDEX idx_search_logs_keywords ON search_logs(keywords);
|
|
CREATE INDEX idx_search_logs_time ON search_logs(search_time);
|
|
CREATE INDEX idx_rss_sources_industry ON rss_sources(industry_id);
|
|
CREATE INDEX idx_rss_sources_active ON rss_sources(is_active); |