init: Lightpanda 源码解析站
基于 lightpanda-io/browser@e6cffae 的 12 段深度解析, 单文件 HTML + sticky nav + 架构图 + 代码引用。
This commit is contained in:
896
index.html
Normal file
896
index.html
Normal file
@@ -0,0 +1,896 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Lightpanda 源码深度解析 · Zig 无头浏览器</title>
|
||||
<style>
|
||||
:root{
|
||||
--bg:#0a0a0f; --bg2:#12121a; --bg3:#1a1a25; --bg4:#222233;
|
||||
--border:#2a2a3a; --border2:#3a3a4a;
|
||||
--text:#e4e4ec; --text2:#a0a0b2; --text3:#6e6e82;
|
||||
--accent:#ffb547; --accent2:#4ecdc4; --accent3:#ff6b9d; --accent4:#b197fc;
|
||||
--zig:#f7a41d; --rust:#ce422b; --v8:#4285f4; --curl:#3c8ec7;
|
||||
--green:#4ecdc4; --red:#ff6b6b; --orange:#ffa94d; --yellow:#ffd93d;
|
||||
--code-bg:#0d1117; --code-border:#21262d;
|
||||
}
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
html{scroll-behavior:smooth;scroll-padding-top:80px}
|
||||
body{
|
||||
font-family:-apple-system,'SF Pro Text','Helvetica Neue','PingFang SC',sans-serif;
|
||||
background:var(--bg);color:var(--text);line-height:1.75;
|
||||
font-feature-settings:"ss01","cv11";
|
||||
}
|
||||
::selection{background:var(--accent);color:#000}
|
||||
::-webkit-scrollbar{width:8px;height:8px}
|
||||
::-webkit-scrollbar-track{background:var(--bg)}
|
||||
::-webkit-scrollbar-thumb{background:var(--border2);border-radius:4px}
|
||||
::-webkit-scrollbar-thumb:hover{background:var(--text3)}
|
||||
|
||||
/* ===== Top Nav ===== */
|
||||
.topnav{
|
||||
position:fixed;top:0;left:0;right:0;z-index:100;
|
||||
background:rgba(10,10,15,.85);backdrop-filter:blur(12px);
|
||||
border-bottom:1px solid var(--border);
|
||||
padding:.85rem 2rem;display:flex;align-items:center;gap:2rem;
|
||||
}
|
||||
.topnav .brand{font-weight:700;font-size:1rem;color:var(--accent);white-space:nowrap}
|
||||
.topnav .brand .dot{display:inline-block;width:8px;height:8px;background:var(--accent);border-radius:50%;margin-right:.5rem;box-shadow:0 0 12px var(--accent)}
|
||||
.topnav nav{display:flex;gap:.25rem;flex-wrap:wrap;overflow-x:auto}
|
||||
.topnav nav a{
|
||||
color:var(--text2);text-decoration:none;font-size:.82rem;
|
||||
padding:.35rem .7rem;border-radius:6px;white-space:nowrap;transition:.15s;
|
||||
}
|
||||
.topnav nav a:hover{color:var(--accent);background:var(--bg3)}
|
||||
.topnav .right{margin-left:auto;display:flex;gap:.75rem;align-items:center}
|
||||
.topnav .right a{
|
||||
color:var(--text2);text-decoration:none;font-size:.82rem;
|
||||
padding:.35rem .75rem;border:1px solid var(--border2);border-radius:6px;transition:.15s;
|
||||
}
|
||||
.topnav .right a:hover{color:var(--accent);border-color:var(--accent)}
|
||||
|
||||
/* ===== Hero ===== */
|
||||
.hero{
|
||||
min-height:92vh;display:flex;flex-direction:column;justify-content:center;align-items:center;
|
||||
padding:6rem 2rem 4rem;text-align:center;position:relative;overflow:hidden;
|
||||
}
|
||||
.hero::before{
|
||||
content:"";position:absolute;inset:0;
|
||||
background:
|
||||
radial-gradient(800px circle at 20% 30%,rgba(255,181,71,.08),transparent 60%),
|
||||
radial-gradient(600px circle at 80% 70%,rgba(78,205,196,.06),transparent 60%);
|
||||
pointer-events:none;
|
||||
}
|
||||
.hero-inner{position:relative;z-index:2;max-width:960px}
|
||||
.hero .tag{
|
||||
display:inline-block;padding:.35rem .9rem;border:1px solid var(--accent);
|
||||
border-radius:100px;color:var(--accent);font-size:.78rem;font-weight:600;
|
||||
letter-spacing:.1em;text-transform:uppercase;margin-bottom:2rem;
|
||||
}
|
||||
.hero h1{
|
||||
font-size:clamp(2.5rem,6vw,4.5rem);font-weight:800;letter-spacing:-.03em;
|
||||
background:linear-gradient(135deg,#fff 0%,var(--accent) 60%,var(--accent3) 100%);
|
||||
-webkit-background-clip:text;-webkit-text-fill-color:transparent;
|
||||
margin-bottom:1rem;line-height:1.05;
|
||||
}
|
||||
.hero .sub{
|
||||
font-size:clamp(1rem,1.8vw,1.25rem);color:var(--text2);
|
||||
max-width:720px;margin:0 auto 2.5rem;
|
||||
}
|
||||
.hero .sub code{
|
||||
background:var(--bg3);padding:.15rem .45rem;border-radius:4px;
|
||||
color:var(--accent);font-size:.95em;border:1px solid var(--border);
|
||||
}
|
||||
.hero .stats{
|
||||
display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:1rem;
|
||||
max-width:760px;margin:0 auto 2.5rem;
|
||||
}
|
||||
.hero .stat{
|
||||
background:var(--bg2);border:1px solid var(--border);border-radius:12px;
|
||||
padding:1.25rem .5rem;
|
||||
}
|
||||
.hero .stat .num{font-size:2.2rem;font-weight:800;color:var(--accent);line-height:1}
|
||||
.hero .stat .label{font-size:.78rem;color:var(--text2);margin-top:.4rem;text-transform:uppercase;letter-spacing:.06em}
|
||||
.hero .meta{
|
||||
display:flex;justify-content:center;gap:.75rem;flex-wrap:wrap;font-size:.78rem;color:var(--text3);
|
||||
}
|
||||
.hero .meta span{padding:.25rem .65rem;background:var(--bg2);border:1px solid var(--border);border-radius:4px}
|
||||
|
||||
/* ===== Verdict Banner ===== */
|
||||
.verdict{
|
||||
max-width:1200px;margin:-2rem auto 4rem;padding:0 2rem;
|
||||
}
|
||||
.verdict-box{
|
||||
background:linear-gradient(135deg,rgba(78,205,196,.08),rgba(255,181,71,.05));
|
||||
border:1px solid var(--accent2);border-radius:16px;padding:2rem 2.5rem;
|
||||
display:grid;grid-template-columns:auto 1fr;gap:2rem;align-items:center;
|
||||
}
|
||||
.verdict-box .icon{font-size:3rem}
|
||||
.verdict-box h3{color:var(--accent2);font-size:1.3rem;margin-bottom:.5rem}
|
||||
.verdict-box p{color:var(--text2);font-size:.95rem}
|
||||
.verdict-box strong{color:var(--text)}
|
||||
@media(max-width:700px){.verdict-box{grid-template-columns:1fr;text-align:center}}
|
||||
|
||||
/* ===== Section ===== */
|
||||
section{max-width:1200px;margin:0 auto;padding:4rem 2rem;scroll-margin-top:80px}
|
||||
.section-head{margin-bottom:2.5rem}
|
||||
.section-num{
|
||||
display:inline-block;font-family:'SF Mono',Menlo,monospace;color:var(--accent);
|
||||
font-size:.85rem;letter-spacing:.1em;margin-bottom:.5rem;
|
||||
}
|
||||
.section-head h2{
|
||||
font-size:clamp(1.8rem,3.5vw,2.5rem);font-weight:800;letter-spacing:-.02em;
|
||||
background:linear-gradient(135deg,#fff,var(--accent));
|
||||
-webkit-background-clip:text;-webkit-text-fill-color:transparent;
|
||||
margin-bottom:.5rem;
|
||||
}
|
||||
.section-head .lead{color:var(--text2);font-size:1.05rem;max-width:760px}
|
||||
|
||||
/* ===== Cards ===== */
|
||||
.grid-2{display:grid;grid-template-columns:repeat(auto-fit,minmax(380px,1fr));gap:1.25rem}
|
||||
.grid-3{display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));gap:1rem}
|
||||
.card{
|
||||
background:var(--bg2);border:1px solid var(--border);border-radius:12px;
|
||||
padding:1.5rem 1.75rem;transition:.2s;
|
||||
}
|
||||
.card:hover{border-color:var(--border2);transform:translateY(-2px)}
|
||||
.card h4{color:var(--accent);font-size:1rem;margin-bottom:.75rem;display:flex;align-items:center;gap:.5rem}
|
||||
.card h4::before{content:"›";color:var(--accent2);font-weight:700}
|
||||
.card p{color:var(--text2);font-size:.9rem;margin-bottom:.5rem}
|
||||
.card ul{list-style:none;margin-top:.5rem}
|
||||
.card li{
|
||||
color:var(--text2);font-size:.85rem;padding:.25rem 0 .25rem 1rem;position:relative;
|
||||
}
|
||||
.card li::before{content:"·";position:absolute;left:.25rem;color:var(--accent)}
|
||||
.card li strong{color:var(--text)}
|
||||
|
||||
/* ===== Code Reference Chip ===== */
|
||||
.ref{
|
||||
display:inline-block;font-family:'SF Mono',Menlo,Consolas,monospace;
|
||||
font-size:.78em;background:var(--code-bg);color:var(--accent2);
|
||||
padding:.12rem .45rem;border:1px solid var(--code-border);border-radius:4px;
|
||||
white-space:nowrap;margin:0 .1rem;
|
||||
}
|
||||
.ref.rust{color:var(--rust);border-color:rgba(206,66,43,.3)}
|
||||
.ref.zig{color:var(--zig);border-color:rgba(247,164,29,.3)}
|
||||
|
||||
/* ===== Code Block ===== */
|
||||
pre{
|
||||
background:var(--code-bg);border:1px solid var(--code-border);border-radius:10px;
|
||||
padding:1.25rem 1.5rem;overflow-x:auto;font-size:.83rem;line-height:1.6;
|
||||
margin:1rem 0;
|
||||
}
|
||||
pre code{font-family:'SF Mono',Menlo,monospace;color:#e6edf3}
|
||||
.tok-k{color:#ff7b72}.tok-s{color:#a5d6ff}.tok-n{color:#d2a8ff}.tok-c{color:#8b949e;font-style:italic}
|
||||
.tok-f{color:#d2a8ff}.tok-t{color:#ffa657}
|
||||
|
||||
/* ===== Table ===== */
|
||||
table{
|
||||
width:100%;border-collapse:collapse;margin:1rem 0;
|
||||
background:var(--bg2);border-radius:10px;overflow:hidden;border:1px solid var(--border);
|
||||
}
|
||||
th,td{padding:.85rem 1.1rem;text-align:left;border-bottom:1px solid var(--border);font-size:.88rem}
|
||||
th{background:var(--bg3);color:var(--accent);font-weight:600;font-size:.82rem;text-transform:uppercase;letter-spacing:.05em}
|
||||
td{color:var(--text2)}
|
||||
td strong{color:var(--text)}
|
||||
tr:last-child td{border-bottom:none}
|
||||
tr:hover td{background:var(--bg3)}
|
||||
|
||||
/* ===== Arch diagram ===== */
|
||||
.arch{
|
||||
background:var(--bg2);border:1px solid var(--border);border-radius:12px;
|
||||
padding:2rem;margin:1.5rem 0;
|
||||
}
|
||||
.arch-layer{
|
||||
background:var(--bg3);border:1px solid var(--border2);border-radius:8px;
|
||||
padding:1rem 1.25rem;margin-bottom:.75rem;
|
||||
display:flex;align-items:center;gap:1rem;
|
||||
}
|
||||
.arch-layer .ln{
|
||||
width:110px;font-size:.72rem;text-transform:uppercase;letter-spacing:.08em;
|
||||
color:var(--text3);font-weight:600;
|
||||
}
|
||||
.arch-layer .val{flex:1;color:var(--text);font-size:.9rem}
|
||||
.arch-layer .tech{
|
||||
font-family:'SF Mono',Menlo,monospace;font-size:.78rem;color:var(--accent2);
|
||||
}
|
||||
.arch-layer.cdp{border-left:3px solid var(--accent)}
|
||||
.arch-layer.v8{border-left:3px solid var(--v8)}
|
||||
.arch-layer.rust{border-left:3px solid var(--rust)}
|
||||
.arch-layer.curl{border-left:3px solid var(--curl)}
|
||||
.arch-layer.zig{border-left:3px solid var(--zig)}
|
||||
|
||||
/* ===== Highlights ===== */
|
||||
.hl{
|
||||
background:linear-gradient(90deg,rgba(255,181,71,.08),transparent);
|
||||
border-left:3px solid var(--accent);padding:1rem 1.25rem;border-radius:0 6px 6px 0;
|
||||
margin:1.25rem 0;color:var(--text2);font-size:.92rem;
|
||||
}
|
||||
.hl strong{color:var(--text)}
|
||||
.hl.warn{border-left-color:var(--orange);background:linear-gradient(90deg,rgba(255,169,77,.08),transparent)}
|
||||
.hl.warn strong{color:var(--orange)}
|
||||
.hl.crit{border-left-color:var(--red);background:linear-gradient(90deg,rgba(255,107,107,.08),transparent)}
|
||||
.hl.crit strong{color:var(--red)}
|
||||
.hl.ok{border-left-color:var(--green);background:linear-gradient(90deg,rgba(78,205,196,.08),transparent)}
|
||||
.hl.ok strong{color:var(--green)}
|
||||
|
||||
.tag-list{display:flex;flex-wrap:wrap;gap:.4rem;margin-top:.75rem}
|
||||
.tag-list .t{
|
||||
padding:.2rem .6rem;background:var(--bg3);border:1px solid var(--border);
|
||||
border-radius:100px;font-size:.74rem;color:var(--text2);font-family:'SF Mono',monospace;
|
||||
}
|
||||
.tag-list .t.impl{border-color:var(--green);color:var(--green)}
|
||||
.tag-list .t.stub{border-color:var(--orange);color:var(--orange)}
|
||||
.tag-list .t.miss{border-color:var(--red);color:var(--red)}
|
||||
|
||||
/* ===== Footer ===== */
|
||||
footer{
|
||||
margin-top:6rem;padding:3rem 2rem 2rem;text-align:center;
|
||||
border-top:1px solid var(--border);background:var(--bg2);
|
||||
}
|
||||
footer p{color:var(--text3);font-size:.85rem;margin:.25rem 0}
|
||||
footer a{color:var(--accent2);text-decoration:none}
|
||||
footer a:hover{text-decoration:underline}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="topnav">
|
||||
<div class="brand"><span class="dot"></span>Lightpanda 解析</div>
|
||||
<nav>
|
||||
<a href="#verdict">结论</a>
|
||||
<a href="#s1">入口</a>
|
||||
<a href="#s2">CDP</a>
|
||||
<a href="#s3">浏览器核心</a>
|
||||
<a href="#s4">HTML 解析</a>
|
||||
<a href="#s5">V8 绑定</a>
|
||||
<a href="#s6">Web API</a>
|
||||
<a href="#s7">网络</a>
|
||||
<a href="#s8">MCP</a>
|
||||
<a href="#s9">构建</a>
|
||||
<a href="#s10">限制</a>
|
||||
</nav>
|
||||
<div class="right">
|
||||
<a href="https://github.com/lightpanda-io/browser" target="_blank">GitHub ↗</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ========== HERO ========== -->
|
||||
<section class="hero">
|
||||
<div class="hero-inner">
|
||||
<div class="tag">基于 commit e6cffae · 2026-04-13</div>
|
||||
<h1>Lightpanda<br>从源码读懂这只"快 11 倍"的无头熊猫</h1>
|
||||
<p class="sub">
|
||||
全 Zig 手写、非 Chromium 分支的无头浏览器。
|
||||
用 <code>html5ever</code> 解析 HTML,<code>V8</code> 跑 JS,<code>libcurl</code> 跑网络,
|
||||
暴露 CDP + MCP 双协议。<br>
|
||||
这份文档基于 commit <code>e6cffae</code> 的实际源码,带<strong style="color:var(--accent)">文件:行号</strong>引用。
|
||||
</p>
|
||||
<div class="stats">
|
||||
<div class="stat"><div class="num">32</div><div class="label">CDP 域</div></div>
|
||||
<div class="stat"><div class="num">217</div><div class="label">Web API 文件</div></div>
|
||||
<div class="stat"><div class="num">20+</div><div class="label">MCP 工具</div></div>
|
||||
<div class="stat"><div class="num">11×</div><div class="label">相对 Chrome</div></div>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span>Zig</span><span>Rust (html5ever)</span><span>C (libcurl/BoringSSL)</span>
|
||||
<span>V8</span><span>AGPL-3.0</span><span>~9 MB 源码</span>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== VERDICT ========== -->
|
||||
<div id="verdict" class="verdict">
|
||||
<div class="verdict-box">
|
||||
<div class="icon">🎯</div>
|
||||
<div>
|
||||
<h3>不是噱头,但有明确边界</h3>
|
||||
<p>
|
||||
<strong>11× 是真的</strong>——代价是<strong>完全砍掉了 CSS 布局、像素渲染、Canvas 绘制</strong>。
|
||||
Lightpanda 只保留 DOM 正确性、JS 兼容性、网络保真度、语义提取四件事。
|
||||
用它做<strong>批量抓取、SSR 测试、AI Agent 驱动</strong>非常合适;
|
||||
用它做<strong>需要截图、视觉校验、重 SPA</strong>的场景会翻车。
|
||||
AGPL-3.0 license 对商用 SaaS 也要留意。
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ========== S1: Entry ========== -->
|
||||
<section id="s1">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 01 / ENTRY POINT</div>
|
||||
<h2>入口与进程模型</h2>
|
||||
<p class="lead">一个 main,三种模式:CDP 服务器、单次 fetch、MCP stdio。所有模式共用同一套 Browser 实例。</p>
|
||||
</div>
|
||||
|
||||
<div class="arch">
|
||||
<div class="arch-layer zig">
|
||||
<div class="ln">main.zig</div>
|
||||
<div class="val">解析 CLI → 装 allocator → 绑 SIGTERM/SIGINT → 分派模式</div>
|
||||
<div class="tech">main.zig:34-170</div>
|
||||
</div>
|
||||
<div class="arch-layer cdp">
|
||||
<div class="ln">serve mode</div>
|
||||
<div class="val">bind 127.0.0.1:9222 → Server.init() → 事件循环</div>
|
||||
<div class="tech">main.zig:92-118</div>
|
||||
</div>
|
||||
<div class="arch-layer">
|
||||
<div class="ln">fetch mode</div>
|
||||
<div class="val">worker 线程跑 lp.fetch(),支持 --wait-until / --dump / --wait-selector</div>
|
||||
<div class="tech">main.zig:119-145</div>
|
||||
</div>
|
||||
<div class="arch-layer">
|
||||
<div class="ln">mcp mode</div>
|
||||
<div class="val">读 stdin 写 stdout,mcp.router.processRequests()</div>
|
||||
<div class="tech">main.zig:147-167</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>内存策略</h4>
|
||||
<ul>
|
||||
<li>Debug 构建用 <strong>GeneralPurposeAllocator</strong> 查泄漏 <span class="ref">main.zig:34-44</span></li>
|
||||
<li>Release 构建用 C allocator,主线程外包 Arena</li>
|
||||
<li>分 tiny/small/large 三档 Arena 池 <span class="ref">App.zig:40</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>默认参数</h4>
|
||||
<ul>
|
||||
<li>Bind: <strong>127.0.0.1:9222</strong>(默认只绑环回)</li>
|
||||
<li>Inactivity timeout: <strong>10s</strong>(1~604800)</li>
|
||||
<li>CDP 消息上限: <strong>512 KB</strong> <span class="ref">Config.zig:42</span></li>
|
||||
<li>HTTP 超时: <strong>5000 ms</strong> <span class="ref">Config.zig:115</span></li>
|
||||
<li>UA 基线: <code>Lightpanda/1.0</code> <span class="ref">Config.zig:325-339</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S2: CDP ========== -->
|
||||
<section id="s2">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 02 / CDP SERVER</div>
|
||||
<h2>CDP 协议层</h2>
|
||||
<p class="lead">Chrome DevTools Protocol 的 Zig 实现。兼容 Puppeteer/Playwright,但只实现了一个子集。路由用 bit-cast 加速。</p>
|
||||
</div>
|
||||
|
||||
<div class="hl">
|
||||
<strong>巧思</strong>:<span class="ref">cdp/CDP.zig:200-262</span> 的分派器按域名字符数分组,
|
||||
对每一组用 <code>@bitCast(domain[0..N].*)</code> 把字符串整体转成整数,再在整数上 <code>switch</code>,
|
||||
省掉字符串比较,O(1) 路由。
|
||||
</div>
|
||||
|
||||
<pre><code><span class="tok-c">// 伪代码示意(实际在 CDP.zig:200-262)</span>
|
||||
<span class="tok-k">switch</span> (domain.len) {
|
||||
<span class="tok-n">2</span> => <span class="tok-k">switch</span> (@bitCast(u16, domain[0..2].*)) { <span class="tok-s">"LP"</span> => ... },
|
||||
<span class="tok-n">3</span> => <span class="tok-k">switch</span> (@bitCast(u24, domain[0..3].*)) { <span class="tok-s">"DOM"</span>, <span class="tok-s">"Log"</span>, <span class="tok-s">"CSS"</span> => ... },
|
||||
<span class="tok-n">4</span> => ..., <span class="tok-n">5</span> => ..., <span class="tok-n">6</span> => ..., <span class="tok-n">7</span> => ...,
|
||||
}
|
||||
</code></pre>
|
||||
|
||||
<h3 style="color:var(--text);margin:2rem 0 1rem">已实现的 CDP 域</h3>
|
||||
<div class="grid-3">
|
||||
<div class="card"><h4>Target</h4><p>目标发现/附加</p><span class="ref">cdp/domains/target.zig</span></div>
|
||||
<div class="card"><h4>Page</h4><p>导航、脚本求值、生命周期</p><span class="ref">cdp/domains/page.zig</span></div>
|
||||
<div class="card"><h4>DOM</h4><p>树查询、quads、节点搜索</p><span class="ref">cdp/domains/dom.zig</span></div>
|
||||
<div class="card"><h4>Runtime</h4><p>JS 求值、属性读取、调用栈</p><span class="ref">cdp/domains/runtime.zig</span></div>
|
||||
<div class="card"><h4>Network</h4><p>网络事件、请求/响应捕获</p><span class="ref">cdp/domains/network.zig</span></div>
|
||||
<div class="card"><h4>Fetch</h4><p>请求拦截、响应改写</p><span class="ref">cdp/domains/fetch.zig</span></div>
|
||||
<div class="card"><h4>Input</h4><p>鼠标、键盘、触摸</p><span class="ref">cdp/domains/input.zig</span></div>
|
||||
<div class="card"><h4>Log</h4><p>Console 日志</p><span class="ref">cdp/domains/log.zig</span></div>
|
||||
<div class="card"><h4>Storage</h4><p>Cookie、LocalStorage</p><span class="ref">cdp/domains/storage.zig</span></div>
|
||||
<div class="card"><h4>CSS</h4><p>规则读取(部分)</p><span class="ref">cdp/domains/css.zig</span></div>
|
||||
<div class="card"><h4>Browser</h4><p>版本、窗口边界(多为桩)</p><span class="ref">cdp/domains/browser.zig</span></div>
|
||||
<div class="card"><h4>Inspector</h4><p>状态(桩)</p><span class="ref">cdp/domains/inspector.zig</span></div>
|
||||
<div class="card"><h4>Security</h4><p>安全信息</p><span class="ref">cdp/domains/security.zig</span></div>
|
||||
<div class="card"><h4>Emulation</h4><p>设备模拟(桩——无布局)</p><span class="ref">cdp/domains/emulation.zig</span></div>
|
||||
<div class="card"><h4>Accessibility</h4><p>AXNode 可访问性树</p><span class="ref">cdp/domains/accessibility.zig</span></div>
|
||||
<div class="card"><h4>Performance</h4><p>性能指标</p><span class="ref">cdp/domains/performance.zig</span></div>
|
||||
<div class="card"><h4>LP (自扩)</h4><p>Markdown dump 等</p><span class="ref">cdp/domains/lp.zig</span></div>
|
||||
</div>
|
||||
|
||||
<div class="hl warn" style="margin-top:2rem">
|
||||
<strong>关键约束</strong>:<span class="ref">cdp/CDP.zig:270-281</span> 强制只允许一个 BrowserContext 同时存在——
|
||||
Lightpanda 任何时刻只有一个活动上下文、一个 Session、一个活动 Page。
|
||||
<strong>不支持多页面并发</strong>。这是为了简化状态管理,但比 Chrome 的标签架构弱。
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S3: Browser Core ========== -->
|
||||
<section id="s3">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 03 / BROWSER CORE</div>
|
||||
<h2>浏览器核心三件套</h2>
|
||||
<p class="lead">Browser / Session / Page 三层结构,加上 Runner 事件循环。</p>
|
||||
</div>
|
||||
|
||||
<div class="arch">
|
||||
<div class="arch-layer cdp">
|
||||
<div class="ln">Browser</div>
|
||||
<div class="val">持有 V8 环境,创建 Session</div>
|
||||
<div class="tech">Browser.zig:43-119</div>
|
||||
</div>
|
||||
<div class="arch-layer">
|
||||
<div class="ln"> └─ Session</div>
|
||||
<div class="val">cookie jar / history / origins / Page 生命周期</div>
|
||||
<div class="tech">Session.zig:109-187</div>
|
||||
</div>
|
||||
<div class="arch-layer">
|
||||
<div class="ln"> └─ Page</div>
|
||||
<div class="val">DOM 树 + frame 层级(iframes)</div>
|
||||
<div class="tech">Page.zig:200-600+</div>
|
||||
</div>
|
||||
<div class="arch-layer zig">
|
||||
<div class="ln">Runner</div>
|
||||
<div class="val">事件循环: tick HttpClient → runMacrotasks → runMicrotasks</div>
|
||||
<div class="tech">Runner.zig:57-150</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>导航流程</h4>
|
||||
<ul>
|
||||
<li>Client 调 <code>Page.navigate(url)</code>(CDP 或直接 API)</li>
|
||||
<li>Page 入队导航</li>
|
||||
<li><code>Runner.wait()</code> 驱动事件循环直到满足等待条件</li>
|
||||
<li>HTTP 进度 → Parser → JS 任务队列 → 微任务</li>
|
||||
<li>Page 发出 <code>load</code> / <code>DOMContentLoaded</code> / <code>networkidle</code></li>
|
||||
<li>通知 CDP 的 <code>Page.navigated</code></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>Arena 策略</h4>
|
||||
<p>Session 持有两个 Arena:</p>
|
||||
<ul>
|
||||
<li><strong>page_arena</strong> — 每次导航重建,销毁快</li>
|
||||
<li><strong>arena</strong> — Session 生命周期</li>
|
||||
</ul>
|
||||
<p style="margin-top:.75rem">
|
||||
<span class="ref">Session.zig:116-117</span>
|
||||
代价:无跨页面复用,但换来干净快速的 teardown
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S4: HTML Parsing ========== -->
|
||||
<section id="s4">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 04 / HTML PARSING</div>
|
||||
<h2>HTML 解析 · Zig ↔ Rust FFI</h2>
|
||||
<p class="lead">直接用 Mozilla 的 html5ever(Rust 写的 HTML5 spec 实现),通过 C FFI 被 Zig 调用。</p>
|
||||
</div>
|
||||
|
||||
<div class="hl ok">
|
||||
<strong>为什么不自己写?</strong> html5ever 是 Mozilla Firefox 的产线级 HTML parser,
|
||||
实现了 adoption agency、template 内容模型、script insertion point 等所有奇葩边缘情况。
|
||||
从零复刻一个正确的 HTML parser 是多人年级别的工程,Lightpanda 直接用就对了。
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>构建集成</h4>
|
||||
<ul>
|
||||
<li><span class="ref">build.zig:235-265</span> 跑 <code>cargo build</code> 编译 <span class="ref rust">src/html5ever/Cargo.toml</span></li>
|
||||
<li>产出 <code>liblitefetch_html5ever.a</code></li>
|
||||
<li>Zig 侧 <code>mod.addObjectFile(obj)</code> 静态链入</li>
|
||||
<li>Cargo.toml + lib.rs 作为 input 追踪,变化自动重编</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>C API(Zig 侧声明)</h4>
|
||||
<ul>
|
||||
<li><code>html5ever_parse_document()</code></li>
|
||||
<li><code>html5ever_parse_document_with_encoding()</code> — charset aware</li>
|
||||
<li><code>html5ever_parse_fragment()</code> — innerHTML</li>
|
||||
<li><code>html5ever_streaming_parser_create/feed/finish()</code></li>
|
||||
<li><span class="ref zig">browser/parser/html5ever.zig:21-90</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3 style="color:var(--text);margin:2rem 0 1rem">Rust → Zig 回调列表</h3>
|
||||
<pre><code><span class="tok-c">// browser/parser/html5ever.zig:21-40</span>
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">createElementCallback</span>(ctx: *Parser, tag: *const u8, attrs: *const Attr, n: usize) *Node;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">appendCallback</span>(ctx: *Parser, parent: *Node, child: *Node) void;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">popCallback</span>(ctx: *Parser, node: *Node) void;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">createCommentCallback</span>(ctx: *Parser, text: *const u8, len: usize) *Node;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">createProcessingInstruction</span>(ctx: *Parser, target: [], data: []) *Node;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">appendDoctypeToDocument</span>(ctx: *Parser, name: [], publicId: [], systemId: []) void;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">getTemplateContentsCallback</span>(ctx: *Parser, node: *Node) *Node;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">reparentChildrenCallback</span>(ctx: *Parser, old: *Node, new: *Node) void;
|
||||
<span class="tok-k">extern fn</span> <span class="tok-f">addAttrsIfMissingCallback</span>(ctx: *Parser, node: *Node, attrs: []Attr) void;
|
||||
</code></pre>
|
||||
|
||||
<p style="color:var(--text2);margin:1rem 0">
|
||||
每个回调在 Zig 端实现为 <code>callconv(.c) fn (ctx: *Parser, ...)</code>
|
||||
<span class="ref">Parser.zig:42-176</span>。
|
||||
<code>ParsedNode</code> 包装 <code>Node*</code> 加可选 element 数据。
|
||||
错误通过 <code>Parser.err</code> union 和源码位置归属。
|
||||
</p>
|
||||
|
||||
<div class="hl">
|
||||
<strong>DOM 节点表示</strong>(<span class="ref">browser/webapi/Node.zig</span>):tagged union 包含
|
||||
Document、DocumentFragment、Element、Text、Comment、PI、DocumentType、CDATA。
|
||||
Element 带 qualified name、懒创建的 Attribute 节点、双向链表子节点、cached namespace URI。
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S5: V8 ========== -->
|
||||
<section id="s5">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 05 / JS RUNTIME</div>
|
||||
<h2>JS 运行时 · V8 绑定</h2>
|
||||
<p class="lead">用 V8 跑 JS,通过代码生成为每个 WebAPI 类型自动产出绑定。Zig 对象和 V8 对象通过 internal field 指针互映。</p>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>V8 集成</h4>
|
||||
<ul>
|
||||
<li><span class="ref">build.zig:213-233</span> <code>--prebuilt-v8-path</code> 优先,否则从 <code>zig-v8</code> 包从源码编译(~1 小时)</li>
|
||||
<li><span class="ref zig">browser/js/js.zig:19-72</span> 导出 V8 C API、TypedArray wrapper</li>
|
||||
<li><span class="ref zig">browser/js/Platform.zig</span> 平台初始化,快照加载</li>
|
||||
<li><span class="ref zig">browser/js/Env.zig</span> 每页多 Context(支持同源隔离),Global = Window</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>绑定桥</h4>
|
||||
<p><span class="ref">browser/js/bridge.zig</span> 是代码生成式绑定,对每个 WebAPI 类型自动产出:</p>
|
||||
<ul>
|
||||
<li>Constructor(<code>new Element(...)</code>)</li>
|
||||
<li>Property accessor(getter/setter)</li>
|
||||
<li>Method</li>
|
||||
<li>Callback dispatch</li>
|
||||
</ul>
|
||||
<p style="margin-top:.5rem">JS 对象在 V8 internal field 存 Zig 指针,identity 稳定:同一个 Zig 对象永远映射到同一个 V8 对象。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3 style="color:var(--text);margin:2rem 0 1rem">类型转换规则(Value.zig)</h3>
|
||||
<table>
|
||||
<thead><tr><th>Zig 类型</th><th>V8 类型</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td><code>bool</code></td><td>Boolean</td></tr>
|
||||
<tr><td><code>i32 / u32 / f64</code></td><td>Number</td></tr>
|
||||
<tr><td><code>[]const u8</code></td><td>String</td></tr>
|
||||
<tr><td><code>*MyType</code></td><td>Object with internal field = 指针</td></tr>
|
||||
<tr><td><code>?T</code></td><td>null / Object</td></tr>
|
||||
<tr><td><code>error!T</code></td><td>JS 异常 / 值</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="hl">
|
||||
<strong>调用回传</strong>(<span class="ref">browser/js/Caller.zig</span>):JS 调 Zig 方法 → 从 V8 internal field 取 Zig 指针 →
|
||||
从 V8 参数数组取参数并转 Zig 类型 → 调 Zig 函数 → 返回值转 V8 value。
|
||||
Zig error 直接 throw 成 JS 异常。
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S6: Web API ========== -->
|
||||
<section id="s6">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 06 / WEB API</div>
|
||||
<h2>Web API 实现矩阵</h2>
|
||||
<p class="lead"><code>src/browser/webapi/</code> 下 <strong>217 个 .zig 文件</strong>。完整、半实现、缺失三档。</p>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card" style="border-color:var(--green)">
|
||||
<h4 style="color:var(--green)">✓ 完整实现</h4>
|
||||
<div class="tag-list">
|
||||
<span class="t impl">Node/Element</span>
|
||||
<span class="t impl">Document</span>
|
||||
<span class="t impl">querySelector(All)</span>
|
||||
<span class="t impl">classList</span>
|
||||
<span class="t impl">dataset</span>
|
||||
<span class="t impl">EventTarget</span>
|
||||
<span class="t impl">MouseEvent</span>
|
||||
<span class="t impl">KeyboardEvent</span>
|
||||
<span class="t impl">LocalStorage</span>
|
||||
<span class="t impl">SessionStorage</span>
|
||||
<span class="t impl">Cookie jar</span>
|
||||
<span class="t impl">URL / URLSearchParams</span>
|
||||
<span class="t impl">fetch()</span>
|
||||
<span class="t impl">XMLHttpRequest</span>
|
||||
<span class="t impl">WebSocket</span>
|
||||
<span class="t impl">SubtleCrypto</span>
|
||||
<span class="t impl">FileReader</span>
|
||||
<span class="t impl">Performance</span>
|
||||
<span class="t impl">setTimeout</span>
|
||||
<span class="t impl">MutationObserver</span>
|
||||
<span class="t impl">Navigation/History</span>
|
||||
<span class="t impl">CustomElementRegistry</span>
|
||||
<span class="t impl">ShadowRoot</span>
|
||||
<span class="t impl">Selection/Range</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card" style="border-color:var(--orange)">
|
||||
<h4 style="color:var(--orange)">△ 桩实现</h4>
|
||||
<div class="tag-list">
|
||||
<span class="t stub">OffscreenCanvas (blob=空)</span>
|
||||
<span class="t stub">Canvas2D (方法空)</span>
|
||||
<span class="t stub">CSSStyleSheet (解析但不级联)</span>
|
||||
<span class="t stub">IntersectionObserver (永远可见)</span>
|
||||
<span class="t stub">AXNode (部分)</span>
|
||||
<span class="t stub">Emulation (no-op)</span>
|
||||
</div>
|
||||
<p style="margin-top:1rem;font-size:.85rem">
|
||||
<span class="ref">webapi/canvas/OffscreenCanvas.zig:74-77</span>
|
||||
<code>convertToBlob()</code> 返回空 Blob
|
||||
</p>
|
||||
<p style="font-size:.85rem">
|
||||
<span class="ref">webapi/canvas/OffscreenCanvas.zig:80</span>
|
||||
<code>transferToImageBitmap()</code> 返回 null
|
||||
</p>
|
||||
</div>
|
||||
<div class="card" style="border-color:var(--red);grid-column:span 2">
|
||||
<h4 style="color:var(--red)">✗ 完全缺失</h4>
|
||||
<div class="tag-list">
|
||||
<span class="t miss">CSS 布局引擎</span>
|
||||
<span class="t miss">盒模型 / 级联 / 继承</span>
|
||||
<span class="t miss">Grid / Flex</span>
|
||||
<span class="t miss">像素渲染</span>
|
||||
<span class="t miss">WebGL</span>
|
||||
<span class="t miss">getUserMedia</span>
|
||||
<span class="t miss">WebRTC</span>
|
||||
<span class="t miss">Web Audio</span>
|
||||
<span class="t miss">IndexedDB</span>
|
||||
<span class="t miss">ServiceWorker</span>
|
||||
<span class="t miss">SVG 渲染</span>
|
||||
<span class="t miss">CSS Animations 执行</span>
|
||||
<span class="t miss">@media print</span>
|
||||
<span class="t miss">getComputedStyle (真实值)</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="hl warn" style="margin-top:2rem">
|
||||
<strong>语义警告</strong>:<span class="ref">webapi/IntersectionObserver.zig</span>
|
||||
因为没有布局引擎,<strong>所有元素一律视为完全可见</strong>。
|
||||
任何依赖"滚动到视口才触发加载"的脚本(懒加载、无限列表)都会一次性全触发——
|
||||
这通常对爬虫来说是<em>好事</em>,因为页面一上来就全展开了,但对 A/B 测试脚本是错误行为。
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S7: Network ========== -->
|
||||
<section id="s7">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 07 / NETWORK</div>
|
||||
<h2>网络栈 · libcurl 中心化</h2>
|
||||
<p class="lead">libcurl + BoringSSL + nghttp2 + brotli + zlib。关掉所有非 HTTP 协议。</p>
|
||||
</div>
|
||||
|
||||
<div class="arch">
|
||||
<div class="arch-layer curl"><div class="ln">libcurl</div><div class="val">HTTP/HTTPS/WebSocket(关掉 FTP/IMAP/LDAP)</div><div class="tech">build.zig:439-600+</div></div>
|
||||
<div class="arch-layer"><div class="ln">TLS</div><div class="val">BoringSSL (Google OpenSSL fork)</div><div class="tech">build.zig:296-437</div></div>
|
||||
<div class="arch-layer"><div class="ln">HTTP/2</div><div class="val">nghttp2</div><div class="tech">build.zig:296-437</div></div>
|
||||
<div class="arch-layer"><div class="ln">压缩</div><div class="val">brotli + zlib + zstd</div><div class="tech">build.zig:296-437</div></div>
|
||||
<div class="arch-layer zig"><div class="ln">事件循环</div><div class="val">CurlM multi handle + epoll/kqueue + wakeup pipe</div><div class="tech">network/Network.zig:52-150</div></div>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>HTTP 客户端</h4>
|
||||
<ul>
|
||||
<li><span class="ref zig">browser/HttpClient.zig</span></li>
|
||||
<li>持久连接池(configurable)</li>
|
||||
<li>并发限流: <code>--http-max-concurrent</code>, <code>--http-max-host-open</code></li>
|
||||
<li>超时: 连接 + 传输分开</li>
|
||||
<li>响应大小上限: <code>--http-max-response-size</code></li>
|
||||
<li>最多 10 次重定向</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>安全相关</h4>
|
||||
<ul>
|
||||
<li><strong>SSRF 防护</strong>: <span class="ref">network/IpFilter.zig</span>
|
||||
<code>--block-private-networks</code> 屏蔽 RFC 1918 + IPv6 ULA,在 DNS 解析后执行</li>
|
||||
<li><strong>自定义 CIDR</strong>: <code>--block-cidrs</code></li>
|
||||
<li><strong>Robots.txt</strong>: <span class="ref">network/Robots.zig</span> <code>--obey-robots</code></li>
|
||||
<li><strong>WebBot Auth</strong>: <span class="ref">network/WebBotAuth.zig</span> Ed25519 签名</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>缓存</h4>
|
||||
<ul>
|
||||
<li><span class="ref">network/cache/Cache.zig</span> + <code>FsCache.zig</code></li>
|
||||
<li>可选文件系统缓存 <code>--http-cache-dir</code></li>
|
||||
<li>尊重 HTTP 缓存头(Cache-Control / ETag / Last-Modified)</li>
|
||||
<li>按 URL + request headers 为 key</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>请求拦截</h4>
|
||||
<ul>
|
||||
<li><span class="ref">cdp/domains/fetch.zig</span></li>
|
||||
<li>CDP Fetch 域可拦截/改写任何请求</li>
|
||||
<li>Pending transfer 挂在 <code>BrowserContext.intercept_state</code></li>
|
||||
<li>客户端可 abort / allow / mock response</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S8: MCP ========== -->
|
||||
<section id="s8">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 08 / MCP</div>
|
||||
<h2>MCP 集成(独特卖点)</h2>
|
||||
<p class="lead">Model Context Protocol 服务器,面向 Claude / Cursor / Cline 这类 AI Agent 工具。大部分浏览器只暴露 CDP 或 WebDriver,Lightpanda 把 MCP 当一等公民。</p>
|
||||
</div>
|
||||
|
||||
<pre><code><span class="tok-c">// 启动流</span>
|
||||
$ lightpanda mcp [--cdp-port 9223]
|
||||
↓
|
||||
main() 起 mcpThread() <span class="tok-c">// main.zig:179-194</span>
|
||||
↓
|
||||
mcp.Server.init(browser, session, http) <span class="tok-c">// mcp/Server.zig:27-54</span>
|
||||
↓
|
||||
mcp.router.processRequests() <span class="tok-c">// 读 stdin / 写 stdout</span>
|
||||
↓
|
||||
JSON-RPC 2.0 请求 → 路由 → handler → 响应
|
||||
</code></pre>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>协议方法</h4>
|
||||
<ul>
|
||||
<li><code>initialize</code> — 握手,返回协议版本</li>
|
||||
<li><code>ping</code> — 心跳</li>
|
||||
<li><code>resources/list</code> — 枚举资源</li>
|
||||
<li><code>resources/read</code> — 读资源</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>资源(2 种)</h4>
|
||||
<ul>
|
||||
<li><code>mcp://page/html</code> — 完整序列化 DOM</li>
|
||||
<li><code>mcp://page/markdown</code> — token-efficient Markdown</li>
|
||||
<li><span class="ref">mcp/resources.zig:9-22</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3 style="color:var(--text);margin:2rem 0 1rem">给 AI Agent 的工具(20+)</h3>
|
||||
<table>
|
||||
<thead><tr><th>类别</th><th>工具</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td><strong>导航</strong></td><td><code>goto(url, timeout, waitUntil)</code>, <code>navigate()</code></td></tr>
|
||||
<tr><td><strong>提取</strong></td><td><code>markdown(url)</code>, <code>links(url)</code>, <code>semantic_tree(url, maxDepth)</code>, <code>interactiveElements(url)</code>, <code>structuredData(url)</code>, <code>detectForms(url)</code></td></tr>
|
||||
<tr><td><strong>交互</strong></td><td><code>click(backendNodeId)</code>, <code>fill(backendNodeId, text)</code>, <code>hover</code>, <code>press(key)</code>, <code>scroll(x, y)</code></td></tr>
|
||||
<tr><td><strong>检查</strong></td><td><code>nodeDetails(backendNodeId)</code> — tag/role/name/interactivity/value/href/checked/options</td></tr>
|
||||
<tr><td><strong>JS</strong></td><td><code>evaluate(script, url, timeout, waitUntil)</code>, <code>eval()</code></td></tr>
|
||||
<tr><td><strong>等待</strong></td><td><code>waitForSelector(selector, timeout)</code></td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p style="color:var(--text3);font-size:.85rem;margin-top:.5rem"><span class="ref">mcp/tools.zig:48-300+</span></p>
|
||||
|
||||
<div class="hl ok">
|
||||
<strong>为什么有价值?</strong> 用 Puppeteer 驱动 Chrome 时,AI 要学 CDP 协议 + 自己管 nodeId。
|
||||
用 Lightpanda MCP,Claude 直接用 <code>click(5)</code> / <code>fill(3, "hello")</code> 这类语义化 tool call,
|
||||
<strong>不需要学协议细节</strong>。<code>markdown(url)</code> 还把 DOM 降维成 LLM 友好的 token 密集型文本。
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S9: Build ========== -->
|
||||
<section id="s9">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 09 / BUILD</div>
|
||||
<h2>构建系统</h2>
|
||||
<p class="lead"><code>build.zig</code> 34 KB,非平凡。把 V8 / Rust crate / libcurl + 依赖链全串起来。</p>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>外部依赖(build.zig.zon)</h4>
|
||||
<ul>
|
||||
<li><strong>v8</strong> — 预编译或源码(源码 ~1h)</li>
|
||||
<li><strong>curl</strong> — 加 boringssl + nghttp2 + brotli + zlib + zstd</li>
|
||||
<li><strong>html5ever</strong> — Rust crate,仓内自带</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>产物</h4>
|
||||
<ul>
|
||||
<li><code>lightpanda</code> — 主程序</li>
|
||||
<li><code>lightpanda-snapshot-creator</code> — V8 snapshot 生成</li>
|
||||
<li><code>legacy_test</code> — 集成测试 runner</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>主要步骤</h4>
|
||||
<ul>
|
||||
<li><span class="ref">build.zig:213-233</span> V8 链接 + ASAN/TSAN 选项</li>
|
||||
<li><span class="ref">build.zig:235-265</span> 跑 cargo build 编译 html5ever → <code>.a</code> → <code>addObjectFile</code></li>
|
||||
<li><span class="ref">build.zig:439-600+</span> 编译 libcurl(HTTP/2 + WebSocket + HTTPS + IPv6,关掉 FTP/IMAP/LDAP)</li>
|
||||
<li><span class="ref">build.zig:296-437</span> 编译 zlib/brotli/nghttp2/BoringSSL</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>常用命令</h4>
|
||||
<pre style="margin:0"><code>zig build <span class="tok-c"># 编译</span>
|
||||
zig build test <span class="tok-c"># 测试</span>
|
||||
zig build fmt <span class="tok-c"># 格式化</span>
|
||||
zig build <span class="tok-t">-Doptimize</span>=<span class="tok-n">ReleaseSafe</span></code></pre>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ========== S10: Limits ========== -->
|
||||
<section id="s10">
|
||||
<div class="section-head">
|
||||
<div class="section-num">§ 10 / LIMITS & TRADE-OFFS</div>
|
||||
<h2>已知限制与架构权衡</h2>
|
||||
<p class="lead">从代码里读出来的真实状况,不看营销。TODOs、桩方法、unreachable 都是一手线索。</p>
|
||||
</div>
|
||||
|
||||
<h3 style="color:var(--text);margin:0 0 1rem">有意为之的设计取舍</h3>
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h4>零渲染</h4>
|
||||
<p>Canvas 桩、CSS 解析但不布局、无盒模型、无计算样式。<br>
|
||||
<strong>代价</strong>:截图、视觉校验、依赖元素位置的反爬全部失效。<br>
|
||||
<strong>收益</strong>:11× 速度来源。</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>无布局引擎</h4>
|
||||
<p><code>IntersectionObserver</code> 永远"可见",媒体查询被忽略,@font-face 注册但不加载。<br>
|
||||
<strong>影响</strong>:懒加载一次全展开(爬虫友好),但 A/B 脚本可能出错。</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>单页上下文</h4>
|
||||
<p><span class="ref">cdp/CDP.zig:270-281</span> BrowserContext 唯一、Session 唯一、Page 唯一活动。<br>
|
||||
<strong>影响</strong>:没有真正的多标签并发,需要并发时必须多进程。</p>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h4>Fetch body 常驻内存</h4>
|
||||
<p><span class="ref">cdp/CDP.zig:384</span> 捕获的响应 body 不流到磁盘。<br>
|
||||
<strong>影响</strong>:抓巨型文件会内存爆炸,典型页面无忧。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3 style="color:var(--text);margin:2.5rem 0 1rem">代码里的 TODO 线索</h3>
|
||||
<table>
|
||||
<thead><tr><th>位置</th><th>TODO / 桩</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td><span class="ref">cdp/domains/page.zig</span></td><td>缺 <code>transitionType</code>、<code>referrerPolicy</code> 枚举</td></tr>
|
||||
<tr><td><span class="ref">cdp/domains/dom.zig</span></td><td>quads 即使元素应隐藏也照填</td></tr>
|
||||
<tr><td><span class="ref">cdp/domains/network.zig</span></td><td>子 frame 没进 <code>Network.getCertificateDetails</code></td></tr>
|
||||
<tr><td><span class="ref">cdp/domains/fetch.zig</span></td><td>跨页面请求回复可能跨 context 泄漏</td></tr>
|
||||
<tr><td><span class="ref">cdp/domains/emulation.zig</span></td><td>Device emulation 是 no-op(本来就没布局)</td></tr>
|
||||
<tr><td><span class="ref">cdp/domains/browser.zig</span></td><td>窗口尺寸硬编码</td></tr>
|
||||
<tr><td><span class="ref">cdp/AXNode.zig</span></td><td>Accessibility tree 在 label_element / label_wrap 有 TODO</td></tr>
|
||||
<tr><td><span class="ref">webapi/selector/Parser.zig</span></td><td>复杂选择器 <code>:has()</code> 等可能桩</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<h3 style="color:var(--text);margin:2.5rem 0 1rem">最终判断表</h3>
|
||||
<table>
|
||||
<thead><tr><th>场景</th><th>Lightpanda</th><th>说明</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td>批量抓静态/半动态页面</td><td style="color:var(--green)">✓ 强烈推荐</td><td>比 Chrome 省 9× 内存</td></tr>
|
||||
<tr><td>SSR 测试</td><td style="color:var(--green)">✓ 合适</td><td>DOM 正确性为一等公民</td></tr>
|
||||
<tr><td>给 AI Agent 当"浏览器臂"</td><td style="color:var(--green)">✓ 原生支持</td><td>MCP 一等公民,20+ 语义工具</td></tr>
|
||||
<tr><td>跑 Playwright/Puppeteer 脚本</td><td style="color:var(--accent2)">△ 大部分能跑</td><td>兼容 CDP,但不支持需要截图/布局的 API</td></tr>
|
||||
<tr><td>需要截图或像素校验</td><td style="color:var(--red)">✗ 不行</td><td>没有渲染管线</td></tr>
|
||||
<tr><td>重 SPA(依赖可见性懒加载)</td><td style="color:var(--orange)">△ 语义偏差</td><td>所有元素"可见",懒加载一次全触发</td></tr>
|
||||
<tr><td>WebRTC / WebGL / ServiceWorker</td><td style="color:var(--red)">✗ 不行</td><td>全部未实现</td></tr>
|
||||
<tr><td>多标签并发</td><td style="color:var(--red)">✗ 不行</td><td>单 BrowserContext 约束</td></tr>
|
||||
<tr><td>公司 SaaS 后端(AGPL)</td><td style="color:var(--orange)">△ 注意</td><td>自托管可能触发源码披露义务</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
|
||||
<footer>
|
||||
<p><strong>Lightpanda 源码深度解析</strong> · based on <code>lightpanda-io/browser@e6cffae</code></p>
|
||||
<p>读源码日期 2026-04-13 · 作者 kang · 部署 <a href="https://lightpanda-docs.kang-kang.com">lightpanda-docs.kang-kang.com</a></p>
|
||||
<p style="margin-top:1rem"><a href="https://github.com/lightpanda-io/browser" target="_blank">GitHub</a> · <a href="https://lightpanda.io" target="_blank">lightpanda.io</a></p>
|
||||
</footer>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user