From af8d4826a167f55eceeb4f5a91e308ff83038e3b Mon Sep 17 00:00:00 2001 From: kang Date: Tue, 19 May 2026 17:15:49 +0800 Subject: [PATCH] chore: add resource index tooling --- RULES.md | 1 + package.json | 3 +- scripts/build-resource-index.mjs | 355 +++++++++++++++++++++++++++++++ 3 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 scripts/build-resource-index.mjs diff --git a/RULES.md b/RULES.md index f3fefc8..6673a56 100644 --- a/RULES.md +++ b/RULES.md @@ -33,6 +33,7 @@ - VPS 生产 Docker 使用 `docker-compose.prod.yml`,挂载 `./data:/app/data`,读取 `deploy/.env.production`,并强制 `PUBLIC_APP_URL=https://ai-toy.kang-kang.com` - VPS 数据持久化在 `/opt/ai-toy-patent-workflow/data` - VPS 生产环境变量在 `/opt/ai-toy-patent-workflow/deploy/.env.production`,不入库 +- 资源索引:运行 `npm run resources:index` 生成 `data/resource-index.json`、`data/resource-index.md` 和 `data/named/` 人类可读软链接;原始资源文件名不能直接改,避免 session JSON / 图片 URL 断链 ## 环境变量 - `OPENAI_API_KEY` — GPT API Key;文本/结构化/图片生成统一走 GPT 最高规格配置 diff --git a/package.json b/package.json index b22301a..1676dba 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "lint": "next lint", "docker:up": "docker compose up -d --build", "docker:down": "docker compose down", - "docker:logs": "docker compose logs -f web" + "docker:logs": "docker compose logs -f web", + "resources:index": "node scripts/build-resource-index.mjs data" }, "dependencies": { "next": "^15.5.18", diff --git a/scripts/build-resource-index.mjs b/scripts/build-resource-index.mjs new file mode 100644 index 0000000..579e17a --- /dev/null +++ b/scripts/build-resource-index.mjs @@ -0,0 +1,355 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; + +const dataDir = path.resolve(process.argv[2] || 'data'); +const makeAliases = !process.argv.includes('--no-aliases'); +const generatedAt = new Date().toISOString(); + +function readJson(filePath, fallback = null) { + try { + return JSON.parse(fs.readFileSync(filePath, 'utf8')); + } catch { + return fallback; + } +} + +function listFiles(dir) { + if (!fs.existsSync(dir)) return []; + const out = []; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) out.push(...listFiles(full)); + else if (entry.isFile()) out.push(full); + } + return out; +} + +function safeSlug(input, fallback = 'item') { + const raw = String(input || '').trim().toLowerCase(); + const ascii = raw + .replace(/[\u2018\u2019]/g, "'") + .replace(/[\u201c\u201d]/g, '"') + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 72); + return ascii || fallback; +} + +function publicUrlToFile(url) { + if (!url || typeof url !== 'string') return null; + if (!url.startsWith('/api/img/')) return null; + const parts = url.split('/').filter(Boolean); + if (parts.length < 4) return null; + return path.join(dataDir, parts[2], parts.slice(3).join('/')); +} + +function extFromUrl(url) { + const clean = String(url || '').split('?')[0]; + const ext = path.extname(clean); + return ext || '.png'; +} + +function pngSize(buffer) { + if (buffer.length < 24) return null; + if (buffer.toString('ascii', 1, 4) !== 'PNG') return null; + return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) }; +} + +function jpegSize(buffer) { + if (buffer.length < 4 || buffer[0] !== 0xff || buffer[1] !== 0xd8) return null; + let offset = 2; + while (offset < buffer.length) { + if (buffer[offset] !== 0xff) return null; + const marker = buffer[offset + 1]; + const length = buffer.readUInt16BE(offset + 2); + if ([0xc0, 0xc1, 0xc2, 0xc3].includes(marker)) { + return { width: buffer.readUInt16BE(offset + 7), height: buffer.readUInt16BE(offset + 5) }; + } + offset += 2 + length; + } + return null; +} + +function svgSize(text) { + const openTag = text.match(/]*>/i)?.[0] || ''; + const width = openTag.match(/\bwidth=["']?([0-9.]+)/i)?.[1]; + const height = openTag.match(/\bheight=["']?([0-9.]+)/i)?.[1]; + const viewBox = openTag.match(/\bviewBox=["']\s*[-0-9.]+\s+[-0-9.]+\s+([0-9.]+)\s+([0-9.]+)/i); + if (width && height) return { width: Number(width), height: Number(height) }; + if (viewBox) return { width: Number(viewBox[1]), height: Number(viewBox[2]) }; + return null; +} + +function imageMeta(filePath) { + if (!filePath || !fs.existsSync(filePath)) return { exists: false }; + const stat = fs.statSync(filePath); + const ext = path.extname(filePath).toLowerCase(); + const buffer = fs.readFileSync(filePath); + const size = + ext === '.png' ? pngSize(buffer) + : ['.jpg', '.jpeg'].includes(ext) ? jpegSize(buffer) + : ext === '.svg' ? svgSize(buffer.toString('utf8')) + : null; + return { + exists: true, + sizeBytes: stat.size, + mtime: stat.mtime.toISOString(), + width: size?.width || null, + height: size?.height || null, + ratio: size?.width && size?.height ? Number((size.width / size.height).toFixed(4)) : null, + }; +} + +function packValues(packs) { + if (Array.isArray(packs)) return packs; + if (packs && typeof packs === 'object') return Object.values(packs); + return []; +} + +function fileRecordFromUrl({ session, section, title, id, url, kind, view, templateId, source }) { + const filePath = publicUrlToFile(url); + const relativePath = filePath ? path.relative(dataDir, filePath) : null; + return { + sessionId: session.id, + sessionPrompt: session.prompt || '', + section, + source, + id, + title: title || templateId || view || id, + kind: kind || null, + view: view || null, + templateId: templateId || null, + url: url || null, + path: relativePath, + displayName: `${safeSlug(kind || section, 'asset')}_${safeSlug(templateId || view || title || id, 'asset')}_${safeSlug(id, 'id')}${extFromUrl(url)}`, + ...imageMeta(filePath), + }; +} + +function sessionSlug(session, index) { + const name = session.characterSpec?.name || session.prompt || session.id; + return `${String(index + 1).padStart(2, '0')}-${safeSlug(name, session.id)}`; +} + +function ensureAlias(asset, sessionAliasDir) { + if (!asset.path || !asset.exists) return null; + const section = safeSlug(asset.section || 'asset'); + const aliasDir = path.join(sessionAliasDir, section); + fs.mkdirSync(aliasDir, { recursive: true }); + const aliasPath = path.join(aliasDir, asset.displayName); + const target = path.relative(aliasDir, path.join(dataDir, asset.path)); + + try { + const current = fs.lstatSync(aliasPath); + if (current.isSymbolicLink()) fs.unlinkSync(aliasPath); + else return { path: path.relative(dataDir, aliasPath), skipped: 'existing non-symlink' }; + } catch { + // no existing alias + } + + fs.symlinkSync(target, aliasPath); + return { path: path.relative(dataDir, aliasPath), target: asset.path }; +} + +const sessionFiles = fs.existsSync(path.join(dataDir, 'sessions')) + ? fs.readdirSync(path.join(dataDir, 'sessions')).filter(name => name.endsWith('.json')).sort() + : []; + +const sessions = sessionFiles + .map(name => ({ file: path.join(dataDir, 'sessions', name), data: readJson(path.join(dataDir, 'sessions', name), null) })) + .filter(item => item.data?.id); + +const allAssets = []; +const sessionSummaries = []; + +sessions.forEach(({ file, data: session }, index) => { + const assets = []; + const add = asset => { + if (asset?.url) { + assets.push(asset); + allAssets.push(asset); + } + }; + + for (const upload of session.uploadedImages || []) { + add(fileRecordFromUrl({ + session, + section: 'uploads', + source: 'uploaded_image', + id: upload.id, + title: upload.originalFilename || upload.filename || upload.role, + url: upload.url, + kind: upload.role, + view: upload.role, + templateId: upload.role, + })); + } + + for (const image of session.images || []) { + add(fileRecordFromUrl({ + session, + section: image.status === 'selected' ? 'selected_candidates' : 'candidates', + source: image.meta?.provider || 'candidate', + id: image.id, + title: image.prompt, + url: image.url, + kind: 'candidate', + view: String(image.meta?.index ?? ''), + templateId: `candidate_${image.meta?.index ?? image.id}`, + })); + if (image.meta?.selectedUrl) { + add(fileRecordFromUrl({ + session, + section: 'selected', + source: 'selected_copy', + id: `${image.id}_selected`, + title: image.prompt, + url: image.meta.selectedUrl, + kind: 'selected', + view: String(image.meta?.index ?? ''), + templateId: `selected_${image.meta?.index ?? image.id}`, + })); + } + } + + if (session.characterSpec?.cleanReferenceImageUrl) { + add(fileRecordFromUrl({ + session, + section: 'anchors', + source: 'clean_reference', + id: `${session.id}_clean_anchor`, + title: session.characterSpec.name || 'clean anchor', + url: session.characterSpec.cleanReferenceImageUrl, + kind: 'anchor', + view: 'clean', + templateId: 'l1_clean_reference', + })); + } + + for (const pack of packValues(session.packs)) { + for (const asset of pack?.assets || []) { + add(fileRecordFromUrl({ + session, + section: `pack_${asset.kind || pack.kind || 'unknown'}`, + source: 'pack_asset', + id: asset.id || asset.assetId, + title: asset.title, + url: asset.url, + kind: asset.kind || pack.kind, + view: asset.view, + templateId: asset.templateId, + })); + } + } + + sessionSummaries.push({ + index: index + 1, + id: session.id, + file: path.relative(dataDir, file), + aliasFolder: sessionSlug(session, index), + prompt: session.prompt || '', + inputMode: session.inputMode || null, + createdAt: session.createdAt || null, + characterName: session.characterSpec?.name || null, + uploadedCount: session.uploadedImages?.length || 0, + candidateCount: session.images?.length || 0, + packCount: packValues(session.packs).length, + assetCount: assets.length, + }); +}); + +const referenced = new Set(allAssets.map(asset => asset.path).filter(Boolean)); +const ignored = new Set(['app.db', 'resource-index.json', 'resource-index.md']); +const files = listFiles(dataDir) + .map(file => path.relative(dataDir, file)) + .filter(file => !file.startsWith(`named${path.sep}`)) + .filter(file => !file.startsWith(`sessions${path.sep}`)) + .filter(file => !ignored.has(file)); +const unreferencedFiles = files.filter(file => !referenced.has(file)); + +let aliases = []; +if (makeAliases) { + const namedDir = path.join(dataDir, 'named'); + fs.mkdirSync(namedDir, { recursive: true }); + allAssets.forEach(asset => { + const summary = sessionSummaries.find(item => item.id === asset.sessionId); + const alias = ensureAlias(asset, path.join(namedDir, summary?.aliasFolder || asset.sessionId)); + if (alias) aliases.push({ ...alias, sessionId: asset.sessionId, assetId: asset.id }); + }); + const unindexedDir = path.join(namedDir, '_unindexed'); + fs.mkdirSync(unindexedDir, { recursive: true }); + for (const file of unreferencedFiles) { + const source = path.join(dataDir, file); + const ext = path.extname(file); + const basename = safeSlug(file.replace(ext, ''), 'unindexed'); + const aliasPath = path.join(unindexedDir, `${basename}${ext}`); + const target = path.relative(unindexedDir, source); + try { + const current = fs.lstatSync(aliasPath); + if (current.isSymbolicLink()) fs.unlinkSync(aliasPath); + else { + aliases.push({ path: path.relative(dataDir, aliasPath), target: file, skipped: 'existing non-symlink' }); + continue; + } + } catch { + // no existing alias + } + fs.symlinkSync(target, aliasPath); + aliases.push({ path: path.relative(dataDir, aliasPath), target: file, section: 'unindexed' }); + } +} + +const index = { + generatedAt, + dataDir, + totals: { + sessions: sessionSummaries.length, + assets: allAssets.length, + referencedFiles: referenced.size, + unreferencedFiles: unreferencedFiles.length, + aliases: aliases.length, + }, + sessions: sessionSummaries, + assets: allAssets, + unreferencedFiles, + aliases, +}; + +fs.writeFileSync(path.join(dataDir, 'resource-index.json'), JSON.stringify(index, null, 2) + '\n'); + +const md = [ + '# Resource Index', + '', + `Generated at: ${generatedAt}`, + `Data directory: ${dataDir}`, + '', + `- Sessions: ${index.totals.sessions}`, + `- Assets in sessions: ${index.totals.assets}`, + `- Referenced files: ${index.totals.referencedFiles}`, + `- Unreferenced files: ${index.totals.unreferencedFiles}`, + `- Named aliases: ${index.totals.aliases}`, + '', + '## Sessions', + '', + ...sessionSummaries.flatMap(session => [ + `### ${session.index}. ${session.characterName || session.prompt || session.id}`, + '', + `- id: \`${session.id}\``, + `- mode: \`${session.inputMode || 'unknown'}\``, + `- alias folder: \`data/named/${session.aliasFolder}/\``, + `- assets: ${session.assetCount}`, + `- packs: ${session.packCount}`, + '', + ]), + '## Missing Or Unindexed Files', + '', + ...(unreferencedFiles.length ? unreferencedFiles.map(file => `- \`${file}\``) : ['None']), + '', +].join('\n'); + +fs.writeFileSync(path.join(dataDir, 'resource-index.md'), md); +console.log(`Resource index written to ${path.join(dataDir, 'resource-index.json')}`); +console.log(`Markdown index written to ${path.join(dataDir, 'resource-index.md')}`); +if (makeAliases) console.log(`Named aliases written under ${path.join(dataDir, 'named')}`);