init repo

This commit is contained in:
2026-04-25 21:50:03 +08:00
commit ada92373c2
124 changed files with 5292 additions and 0 deletions

54
grab-captchas.mjs Normal file
View File

@@ -0,0 +1,54 @@
/**
* 抓取 10 张验证码图片,用于分析 OCR 准确度
*/
import { chromium } from 'playwright';
import { mkdirSync } from 'fs';
import { execSync } from 'child_process';
import path from 'path';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DIR = path.join(__dirname, 'captcha-samples');
mkdirSync(DIR, { recursive: true });
const browser = await chromium.launch({ headless: true, args: ['--no-sandbox'] });
for (let i = 1; i <= 10; i++) {
const ctx = await browser.newContext({
viewport: { width: 1920, height: 1080 },
locale: 'zh-CN',
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
});
const page = await ctx.newPage();
await page.goto('https://www.dianxiaomi.com/home.htm', { waitUntil: 'load', timeout: 30000 });
await page.waitForSelector('#verifyImgCode', { timeout: 10000 });
await page.waitForFunction(() => document.getElementById('verifyImgCode')?.complete === true, { timeout: 5000 }).catch(() => {});
await page.waitForTimeout(1000);
const el = await page.$('#verifyImgCode');
const imgPath = path.join(DIR, `captcha_${i}.png`);
await el.screenshot({ path: imgPath });
// ddddocr
let ddddResult = '';
try {
ddddResult = execSync(`python3 ocr_captcha.py "${imgPath}"`, { encoding: 'utf-8', timeout: 30000 }).trim();
} catch { ddddResult = 'FAIL'; }
// tesseract (方案0: 灰度+放大+阈值)
let tessResult = '';
try {
tessResult = execSync(
`tesseract "${imgPath}" stdout --psm 7 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`,
{ encoding: 'utf-8', timeout: 10000 }
).trim().replace(/[\s\n\r]/g, '');
} catch { tessResult = 'FAIL'; }
console.log(`#${i}: ddddocr="${ddddResult}" tesseract="${tessResult}" -> ${imgPath}`);
await ctx.close();
}
await browser.close();
console.log('\n>> 完成,请查看 captcha-samples/ 目录');