55 lines
2.0 KiB
JavaScript
55 lines
2.0 KiB
JavaScript
/**
|
|
* 抓取 10 张验证码图片,用于分析 OCR 准确度
|
|
*/
|
|
import { chromium } from 'playwright';
|
|
import { mkdirSync } from 'fs';
|
|
import { execSync } from 'child_process';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
const DIR = path.join(__dirname, 'captcha-samples');
|
|
mkdirSync(DIR, { recursive: true });
|
|
|
|
const browser = await chromium.launch({ headless: true, args: ['--no-sandbox'] });
|
|
|
|
for (let i = 1; i <= 10; i++) {
|
|
const ctx = await browser.newContext({
|
|
viewport: { width: 1920, height: 1080 },
|
|
locale: 'zh-CN',
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
});
|
|
const page = await ctx.newPage();
|
|
|
|
await page.goto('https://www.dianxiaomi.com/home.htm', { waitUntil: 'load', timeout: 30000 });
|
|
await page.waitForSelector('#verifyImgCode', { timeout: 10000 });
|
|
await page.waitForFunction(() => document.getElementById('verifyImgCode')?.complete === true, { timeout: 5000 }).catch(() => {});
|
|
await page.waitForTimeout(1000);
|
|
|
|
const el = await page.$('#verifyImgCode');
|
|
const imgPath = path.join(DIR, `captcha_${i}.png`);
|
|
await el.screenshot({ path: imgPath });
|
|
|
|
// ddddocr
|
|
let ddddResult = '';
|
|
try {
|
|
ddddResult = execSync(`python3 ocr_captcha.py "${imgPath}"`, { encoding: 'utf-8', timeout: 30000 }).trim();
|
|
} catch { ddddResult = 'FAIL'; }
|
|
|
|
// tesseract (方案0: 灰度+放大+阈值)
|
|
let tessResult = '';
|
|
try {
|
|
tessResult = execSync(
|
|
`tesseract "${imgPath}" stdout --psm 7 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`,
|
|
{ encoding: 'utf-8', timeout: 10000 }
|
|
).trim().replace(/[\s\n\r]/g, '');
|
|
} catch { tessResult = 'FAIL'; }
|
|
|
|
console.log(`#${i}: ddddocr="${ddddResult}" tesseract="${tessResult}" -> ${imgPath}`);
|
|
|
|
await ctx.close();
|
|
}
|
|
|
|
await browser.close();
|
|
console.log('\n>> 完成,请查看 captcha-samples/ 目录');
|