Files
20250715-66bfff96/代码实现/database_schema.sql
2026-04-25 19:21:03 +08:00

101 lines
3.8 KiB
SQL

-- 搜索系统数据库结构
-- 适用于 SQLite/MySQL/PostgreSQL
-- 1. 行业分类表
CREATE TABLE industries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name_en VARCHAR(50) NOT NULL UNIQUE,
name_cn VARCHAR(50) NOT NULL,
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 2. 信息源配置表
CREATE TABLE rss_sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
industry_id INTEGER NOT NULL,
source_name VARCHAR(100) NOT NULL,
source_url VARCHAR(500) NOT NULL,
source_type VARCHAR(20) NOT NULL, -- 'rss', 'api', 'manual'
authority_level INTEGER DEFAULT 3, -- 1=官方机构, 2=主流媒体, 3=专业平台, 4=其他
language VARCHAR(2) DEFAULT 'en', -- 'en', 'cn'
is_active BOOLEAN DEFAULT TRUE,
last_checked TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (industry_id) REFERENCES industries(id)
);
-- 3. 搜索记录表
CREATE TABLE search_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
keywords TEXT NOT NULL,
industry_id INTEGER,
language VARCHAR(2) DEFAULT 'en',
results_count INTEGER DEFAULT 0,
search_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
user_ip VARCHAR(45),
FOREIGN KEY (industry_id) REFERENCES industries(id)
);
-- 4. 文章内容表
CREATE TABLE articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
content TEXT,
summary TEXT,
author VARCHAR(200),
source_id INTEGER NOT NULL,
original_url VARCHAR(1000) NOT NULL,
published_date TIMESTAMP,
scraped_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
language VARCHAR(2) DEFAULT 'en',
keywords TEXT, -- JSON格式存储关键词
article_hash VARCHAR(64) UNIQUE, -- 防重复
is_archived BOOLEAN DEFAULT FALSE,
FOREIGN KEY (source_id) REFERENCES rss_sources(id)
);
-- 5. 搜索结果表
CREATE TABLE search_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
search_log_id INTEGER NOT NULL,
article_id INTEGER NOT NULL,
relevance_score FLOAT DEFAULT 0.0,
rank_position INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (search_log_id) REFERENCES search_logs(id),
FOREIGN KEY (article_id) REFERENCES articles(id)
);
-- 6. 导出文档记录表
CREATE TABLE exported_docs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
search_log_id INTEGER NOT NULL,
filename VARCHAR(255) NOT NULL,
file_path VARCHAR(500) NOT NULL,
doc_type VARCHAR(20) DEFAULT 'docx', -- 'docx', 'pdf', 'txt'
articles_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (search_log_id) REFERENCES search_logs(id)
);
-- 插入基础数据
INSERT INTO industries (name_en, name_cn, description) VALUES
('finance', '金融行业', '银行、证券、保险、投资等金融服务'),
('ai_software', 'AI与软件', '人工智能、软件开发、技术创新'),
('manufacturing', '制造业', '工业制造、自动化、生产技术'),
('healthcare_pharma', '医疗制药', '医疗健康、制药、生物技术'),
('fmcg', '快消品', '快速消费品、零售、品牌营销'),
('ecommerce_retail', '零售电商', '电子商务、零售业、数字营销'),
('energy_chemical', '能源化工', '能源、化工、石油、新能源'),
('real_estate', '房地产建筑', '房地产、建筑、基础设施');
-- 创建索引优化查询性能
CREATE INDEX idx_articles_published_date ON articles(published_date);
CREATE INDEX idx_articles_source_id ON articles(source_id);
CREATE INDEX idx_articles_language ON articles(language);
CREATE INDEX idx_articles_hash ON articles(article_hash);
CREATE INDEX idx_search_logs_keywords ON search_logs(keywords);
CREATE INDEX idx_search_logs_time ON search_logs(search_time);
CREATE INDEX idx_rss_sources_industry ON rss_sources(industry_id);
CREATE INDEX idx_rss_sources_active ON rss_sources(is_active);