/** * 抓取 10 张验证码图片,用于分析 OCR 准确度 */ import { chromium } from 'playwright'; import { mkdirSync } from 'fs'; import { execSync } from 'child_process'; import path from 'path'; import { fileURLToPath } from 'url'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const DIR = path.join(__dirname, 'captcha-samples'); mkdirSync(DIR, { recursive: true }); const browser = await chromium.launch({ headless: true, args: ['--no-sandbox'] }); for (let i = 1; i <= 10; i++) { const ctx = await browser.newContext({ viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', }); const page = await ctx.newPage(); await page.goto('https://www.dianxiaomi.com/home.htm', { waitUntil: 'load', timeout: 30000 }); await page.waitForSelector('#verifyImgCode', { timeout: 10000 }); await page.waitForFunction(() => document.getElementById('verifyImgCode')?.complete === true, { timeout: 5000 }).catch(() => {}); await page.waitForTimeout(1000); const el = await page.$('#verifyImgCode'); const imgPath = path.join(DIR, `captcha_${i}.png`); await el.screenshot({ path: imgPath }); // ddddocr let ddddResult = ''; try { ddddResult = execSync(`python3 ocr_captcha.py "${imgPath}"`, { encoding: 'utf-8', timeout: 30000 }).trim(); } catch { ddddResult = 'FAIL'; } // tesseract (方案0: 灰度+放大+阈值) let tessResult = ''; try { tessResult = execSync( `tesseract "${imgPath}" stdout --psm 7 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`, { encoding: 'utf-8', timeout: 10000 } ).trim().replace(/[\s\n\r]/g, ''); } catch { tessResult = 'FAIL'; } console.log(`#${i}: ddddocr="${ddddResult}" tesseract="${tessResult}" -> ${imgPath}`); await ctx.close(); } await browser.close(); console.log('\n>> 完成,请查看 captcha-samples/ 目录');