1412 lines
67 KiB
HTML
1412 lines
67 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="zh-CN">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>ARES 源码解析 · withmartian/ares RL Agent 训练框架</title>
|
||
<style>
|
||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||
:root {
|
||
--bg: #0b0d11;
|
||
--bg2: #11141a;
|
||
--panel: #161a22;
|
||
--panel2: #1c2029;
|
||
--border: #232933;
|
||
--border2: #2a3039;
|
||
--text: #d7dae0;
|
||
--muted: #8b919c;
|
||
--dim: #5d6370;
|
||
--blue: #60a5fa;
|
||
--violet: #a78bfa;
|
||
--cyan: #22d3ee;
|
||
--green: #34d399;
|
||
--yellow: #fbbf24;
|
||
--red: #f87171;
|
||
--pink: #f472b6;
|
||
--orange: #fb923c;
|
||
}
|
||
html { scroll-behavior: smooth; }
|
||
body {
|
||
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", "PingFang SC", "Hiragino Sans GB", sans-serif;
|
||
background: var(--bg);
|
||
color: var(--text);
|
||
line-height: 1.7;
|
||
font-size: 15px;
|
||
min-height: 100vh;
|
||
}
|
||
code, pre, .mono { font-family: "JetBrains Mono", "SF Mono", Menlo, Consolas, monospace; }
|
||
|
||
.layout {
|
||
display: grid;
|
||
grid-template-columns: 260px 1fr;
|
||
min-height: 100vh;
|
||
}
|
||
aside {
|
||
position: sticky;
|
||
top: 0;
|
||
align-self: start;
|
||
height: 100vh;
|
||
background: linear-gradient(180deg, #0c0f14 0%, #0b0d11 100%);
|
||
border-right: 1px solid var(--border);
|
||
padding: 1.75rem 1.25rem;
|
||
overflow-y: auto;
|
||
}
|
||
main {
|
||
padding: 3rem 4rem 8rem;
|
||
max-width: 1100px;
|
||
}
|
||
@media (max-width: 960px) {
|
||
.layout { grid-template-columns: 1fr; }
|
||
aside { display: none; }
|
||
main { padding: 1.5rem 1.25rem 5rem; }
|
||
}
|
||
|
||
.brand {
|
||
font-weight: 800;
|
||
font-size: 1.1rem;
|
||
background: linear-gradient(135deg, var(--blue), var(--violet));
|
||
-webkit-background-clip: text;
|
||
-webkit-text-fill-color: transparent;
|
||
margin-bottom: 0.35rem;
|
||
}
|
||
.brand-sub {
|
||
font-size: 0.72rem;
|
||
color: var(--dim);
|
||
margin-bottom: 1.5rem;
|
||
letter-spacing: 0.05em;
|
||
text-transform: uppercase;
|
||
}
|
||
nav ul { list-style: none; }
|
||
nav li { margin-bottom: 0.15rem; }
|
||
nav a {
|
||
display: block;
|
||
padding: 0.4rem 0.65rem;
|
||
color: var(--muted);
|
||
text-decoration: none;
|
||
font-size: 0.86rem;
|
||
border-radius: 6px;
|
||
border-left: 2px solid transparent;
|
||
transition: all 0.15s ease;
|
||
}
|
||
nav a:hover { color: var(--text); background: rgba(255,255,255,0.03); }
|
||
nav a.active {
|
||
color: var(--blue);
|
||
background: rgba(96,165,250,0.08);
|
||
border-left-color: var(--blue);
|
||
}
|
||
nav .group-title {
|
||
font-size: 0.68rem;
|
||
color: var(--dim);
|
||
letter-spacing: 0.1em;
|
||
text-transform: uppercase;
|
||
margin: 1.1rem 0.65rem 0.4rem;
|
||
font-weight: 700;
|
||
}
|
||
.side-meta {
|
||
margin-top: 1.5rem;
|
||
padding-top: 1.1rem;
|
||
border-top: 1px solid var(--border);
|
||
font-size: 0.72rem;
|
||
color: var(--dim);
|
||
line-height: 1.8;
|
||
}
|
||
.side-meta a { color: var(--blue); text-decoration: none; }
|
||
.side-meta code { background: var(--panel); padding: 0.08rem 0.35rem; border-radius: 3px; font-size: 0.72rem; color: var(--cyan); }
|
||
|
||
.hero { margin-bottom: 4rem; }
|
||
.hero-badge {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
gap: 0.4rem;
|
||
padding: 0.3rem 0.75rem;
|
||
background: rgba(96,165,250,0.08);
|
||
border: 1px solid rgba(96,165,250,0.25);
|
||
border-radius: 999px;
|
||
font-size: 0.75rem;
|
||
color: var(--blue);
|
||
margin-bottom: 1.2rem;
|
||
letter-spacing: 0.03em;
|
||
}
|
||
.hero-badge .dot { width: 6px; height: 6px; background: var(--blue); border-radius: 50%; }
|
||
.hero h1 {
|
||
font-size: 3rem;
|
||
font-weight: 800;
|
||
letter-spacing: -0.02em;
|
||
line-height: 1.1;
|
||
margin-bottom: 0.75rem;
|
||
background: linear-gradient(135deg, #e0e5ef 0%, var(--blue) 40%, var(--violet) 80%);
|
||
-webkit-background-clip: text;
|
||
-webkit-text-fill-color: transparent;
|
||
}
|
||
.hero-tagline {
|
||
font-size: 1.15rem;
|
||
color: var(--muted);
|
||
margin-bottom: 2rem;
|
||
max-width: 720px;
|
||
}
|
||
.hero-tagline strong { color: var(--text); font-weight: 600; }
|
||
|
||
.stats {
|
||
display: grid;
|
||
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
|
||
gap: 1rem;
|
||
margin-bottom: 2rem;
|
||
}
|
||
.stat {
|
||
background: var(--panel);
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1rem 1.2rem;
|
||
}
|
||
.stat-value {
|
||
font-size: 1.6rem;
|
||
font-weight: 700;
|
||
color: var(--text);
|
||
line-height: 1;
|
||
margin-bottom: 0.35rem;
|
||
}
|
||
.stat-value.blue { color: var(--blue); }
|
||
.stat-value.violet { color: var(--violet); }
|
||
.stat-value.green { color: var(--green); }
|
||
.stat-value.cyan { color: var(--cyan); }
|
||
.stat-value.yellow { color: var(--yellow); }
|
||
.stat-label {
|
||
font-size: 0.75rem;
|
||
color: var(--muted);
|
||
letter-spacing: 0.03em;
|
||
}
|
||
|
||
.hero-meta {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
gap: 1.5rem;
|
||
font-size: 0.82rem;
|
||
color: var(--muted);
|
||
}
|
||
.hero-meta strong { color: var(--text); }
|
||
.hero-meta a { color: var(--cyan); text-decoration: none; }
|
||
.hero-meta a:hover { text-decoration: underline; }
|
||
|
||
section { margin-bottom: 4.5rem; scroll-margin-top: 1rem; }
|
||
h2 {
|
||
font-size: 1.7rem;
|
||
font-weight: 700;
|
||
margin-bottom: 0.4rem;
|
||
letter-spacing: -0.01em;
|
||
color: #e8ecf3;
|
||
}
|
||
h2 .sec-num {
|
||
display: inline-block;
|
||
color: var(--dim);
|
||
font-size: 0.85rem;
|
||
font-weight: 400;
|
||
margin-right: 0.6rem;
|
||
vertical-align: middle;
|
||
}
|
||
h3 {
|
||
font-size: 1.15rem;
|
||
font-weight: 600;
|
||
margin: 1.8rem 0 0.75rem;
|
||
color: var(--blue);
|
||
letter-spacing: -0.01em;
|
||
}
|
||
h4 {
|
||
font-size: 0.95rem;
|
||
font-weight: 600;
|
||
margin: 1.3rem 0 0.5rem;
|
||
color: var(--cyan);
|
||
}
|
||
.sec-lead {
|
||
color: var(--muted);
|
||
margin-bottom: 1.5rem;
|
||
font-size: 0.95rem;
|
||
max-width: 780px;
|
||
}
|
||
|
||
p { margin-bottom: 0.9rem; color: var(--text); }
|
||
ul, ol { margin: 0.5rem 0 1.2rem 1.3rem; color: var(--text); }
|
||
li { margin-bottom: 0.4rem; line-height: 1.7; }
|
||
li > strong { color: var(--text); font-weight: 600; }
|
||
|
||
code {
|
||
background: rgba(96,165,250,0.1);
|
||
color: var(--cyan);
|
||
padding: 0.1rem 0.4rem;
|
||
border-radius: 4px;
|
||
font-size: 0.82em;
|
||
border: 1px solid rgba(96,165,250,0.15);
|
||
}
|
||
|
||
pre {
|
||
background: #0a0d13;
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1.1rem 1.25rem;
|
||
overflow-x: auto;
|
||
margin: 1rem 0 1.5rem;
|
||
font-size: 0.82rem;
|
||
line-height: 1.65;
|
||
}
|
||
pre code {
|
||
background: transparent;
|
||
border: none;
|
||
color: #c6cdd9;
|
||
padding: 0;
|
||
font-size: inherit;
|
||
}
|
||
pre .kw { color: var(--violet); }
|
||
pre .str { color: var(--green); }
|
||
pre .num { color: var(--orange); }
|
||
pre .com { color: var(--dim); font-style: italic; }
|
||
pre .fn { color: var(--yellow); }
|
||
pre .cls { color: var(--cyan); }
|
||
pre .var { color: var(--pink); }
|
||
|
||
blockquote {
|
||
border-left: 3px solid var(--blue);
|
||
background: rgba(96,165,250,0.05);
|
||
padding: 0.9rem 1.2rem;
|
||
border-radius: 0 8px 8px 0;
|
||
margin: 1.1rem 0;
|
||
color: var(--muted);
|
||
font-size: 0.9rem;
|
||
}
|
||
|
||
table {
|
||
width: 100%;
|
||
border-collapse: collapse;
|
||
margin: 1rem 0 1.5rem;
|
||
font-size: 0.85rem;
|
||
background: var(--panel);
|
||
border-radius: 8px;
|
||
overflow: hidden;
|
||
border: 1px solid var(--border);
|
||
}
|
||
th {
|
||
text-align: left;
|
||
padding: 0.7rem 1rem;
|
||
background: var(--panel2);
|
||
color: var(--muted);
|
||
font-weight: 600;
|
||
font-size: 0.78rem;
|
||
letter-spacing: 0.03em;
|
||
text-transform: uppercase;
|
||
border-bottom: 1px solid var(--border2);
|
||
}
|
||
td {
|
||
padding: 0.75rem 1rem;
|
||
border-bottom: 1px solid var(--border);
|
||
vertical-align: top;
|
||
}
|
||
tr:last-child td { border-bottom: none; }
|
||
tr:hover td { background: rgba(255,255,255,0.02); }
|
||
|
||
.file {
|
||
display: inline-block;
|
||
font-family: "JetBrains Mono", monospace;
|
||
font-size: 0.78rem;
|
||
color: var(--violet);
|
||
background: rgba(167,139,250,0.08);
|
||
padding: 0.05rem 0.4rem;
|
||
border-radius: 4px;
|
||
border: 1px solid rgba(167,139,250,0.2);
|
||
}
|
||
|
||
.card-grid {
|
||
display: grid;
|
||
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||
gap: 1rem;
|
||
margin: 1rem 0 1.5rem;
|
||
}
|
||
.card {
|
||
background: var(--panel);
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1.2rem 1.35rem;
|
||
transition: border-color 0.15s;
|
||
}
|
||
.card:hover { border-color: var(--border2); }
|
||
.card h4 { margin-top: 0; color: var(--blue); font-size: 0.95rem; }
|
||
.card p { font-size: 0.85rem; color: var(--muted); margin-bottom: 0.4rem; }
|
||
.card .tag {
|
||
display: inline-block;
|
||
font-size: 0.7rem;
|
||
padding: 0.15rem 0.5rem;
|
||
border-radius: 3px;
|
||
letter-spacing: 0.03em;
|
||
margin-right: 0.3rem;
|
||
}
|
||
.tag-blue { background: rgba(96,165,250,0.12); color: var(--blue); border: 1px solid rgba(96,165,250,0.25); }
|
||
.tag-violet { background: rgba(167,139,250,0.12); color: var(--violet); border: 1px solid rgba(167,139,250,0.25); }
|
||
.tag-green { background: rgba(52,211,153,0.12); color: var(--green); border: 1px solid rgba(52,211,153,0.25); }
|
||
.tag-yellow { background: rgba(251,191,36,0.12); color: var(--yellow); border: 1px solid rgba(251,191,36,0.25); }
|
||
.tag-red { background: rgba(248,113,113,0.12); color: var(--red); border: 1px solid rgba(248,113,113,0.25); }
|
||
|
||
.ascii {
|
||
background: #08090d;
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1.5rem 1.2rem;
|
||
font-family: "JetBrains Mono", monospace;
|
||
font-size: 0.76rem;
|
||
line-height: 1.55;
|
||
white-space: pre;
|
||
overflow-x: auto;
|
||
color: #a8b0bd;
|
||
margin: 1.2rem 0;
|
||
}
|
||
.ascii .hl-blue { color: var(--blue); }
|
||
.ascii .hl-violet { color: var(--violet); }
|
||
.ascii .hl-green { color: var(--green); }
|
||
.ascii .hl-cyan { color: var(--cyan); }
|
||
.ascii .hl-yellow { color: var(--yellow); }
|
||
.ascii .hl-red { color: var(--red); }
|
||
.ascii .hl-dim { color: var(--dim); }
|
||
|
||
.highlight-box {
|
||
background: linear-gradient(135deg, rgba(96,165,250,0.08), rgba(167,139,250,0.08));
|
||
border: 1px solid rgba(96,165,250,0.25);
|
||
border-radius: 12px;
|
||
padding: 1.5rem 1.75rem;
|
||
margin: 1.5rem 0;
|
||
}
|
||
.highlight-box h4 {
|
||
margin-top: 0;
|
||
color: var(--blue);
|
||
font-size: 1rem;
|
||
}
|
||
.highlight-box p:last-child { margin-bottom: 0; }
|
||
|
||
.callout {
|
||
border-left: 3px solid var(--yellow);
|
||
background: rgba(251,191,36,0.06);
|
||
padding: 0.9rem 1.2rem;
|
||
border-radius: 0 8px 8px 0;
|
||
margin: 1rem 0;
|
||
font-size: 0.88rem;
|
||
}
|
||
.callout strong { color: var(--yellow); }
|
||
.callout.red {
|
||
border-left-color: var(--red);
|
||
background: rgba(248,113,113,0.05);
|
||
}
|
||
.callout.red strong { color: var(--red); }
|
||
.callout.green {
|
||
border-left-color: var(--green);
|
||
background: rgba(52,211,153,0.05);
|
||
}
|
||
.callout.green strong { color: var(--green); }
|
||
|
||
.compare {
|
||
display: grid;
|
||
grid-template-columns: 1fr 1fr;
|
||
gap: 1rem;
|
||
margin: 1rem 0 1.5rem;
|
||
}
|
||
@media (max-width: 700px) { .compare { grid-template-columns: 1fr; } }
|
||
.compare-col {
|
||
background: var(--panel);
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1.25rem;
|
||
}
|
||
.compare-col h5 {
|
||
font-size: 0.92rem;
|
||
margin-bottom: 0.5rem;
|
||
font-weight: 600;
|
||
}
|
||
.compare-col.a h5 { color: var(--blue); }
|
||
.compare-col.b h5 { color: var(--violet); }
|
||
.compare-col ul { margin: 0.4rem 0 0 1.2rem; font-size: 0.85rem; }
|
||
|
||
footer {
|
||
margin-top: 5rem;
|
||
padding-top: 2rem;
|
||
border-top: 1px solid var(--border);
|
||
font-size: 0.8rem;
|
||
color: var(--dim);
|
||
}
|
||
footer a { color: var(--cyan); text-decoration: none; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
|
||
<div class="layout">
|
||
|
||
<aside>
|
||
<div class="brand">ARES 源码解析</div>
|
||
<div class="brand-sub">withmartian · 2026-04-24</div>
|
||
|
||
<nav>
|
||
<div class="group-title">一、定位</div>
|
||
<ul>
|
||
<li><a href="#tldr">TL;DR</a></li>
|
||
<li><a href="#positioning">生态位</a></li>
|
||
<li><a href="#abstraction">RL 抽象</a></li>
|
||
<li><a href="#architecture">架构全图</a></li>
|
||
</ul>
|
||
<div class="group-title">二、核心模块</div>
|
||
<ul>
|
||
<li><a href="#environment">Environment 层</a></li>
|
||
<li><a href="#container">Container 层</a></li>
|
||
<li><a href="#agent">Code Agent 层</a></li>
|
||
<li><a href="#queue-mediated">Queue-Mediated ⭐</a></li>
|
||
<li><a href="#llm-client">LLM Client 层</a></li>
|
||
<li><a href="#ares-proxy">ares-proxy (Go)</a></li>
|
||
<li><a href="#registry">Registry + Presets</a></li>
|
||
</ul>
|
||
<div class="group-title">三、周边</div>
|
||
<ul>
|
||
<li><a href="#examples">Examples 渐进式</a></li>
|
||
<li><a href="#testing">测试 + Mock</a></li>
|
||
<li><a href="#tracking">StatTracker</a></li>
|
||
<li><a href="#mech-interp">mech_interp</a></li>
|
||
</ul>
|
||
<div class="group-title">四、提炼</div>
|
||
<ul>
|
||
<li><a href="#patterns">设计模式</a></li>
|
||
<li><a href="#gold">亮点·坑点·可抄</a></li>
|
||
<li><a href="#comparison">对标启发</a></li>
|
||
<li><a href="#reading-path">阅读路径</a></li>
|
||
</ul>
|
||
</nav>
|
||
|
||
<div class="side-meta">
|
||
<p>📦 commit <code>c804aa2</code></p>
|
||
<p>📏 Py 8,339 · Test 3,561 · Go 5 files</p>
|
||
<p>🔗 <a href="https://github.com/withmartian/ares" target="_blank">GitHub →</a></p>
|
||
</div>
|
||
</aside>
|
||
|
||
<main>
|
||
|
||
<div class="hero">
|
||
<div class="hero-badge"><span class="dot"></span> RL-first · Agentic Research Suite</div>
|
||
<h1>ARES 源码解析</h1>
|
||
<p class="hero-tagline">
|
||
<strong>withmartian/ares</strong> ——
|
||
把 LLM Agent 当成 RL 问题的"考场+监考系统"。
|
||
用 <strong>asyncio.Queue 拦截</strong> Agent 的 LLM 调用,让线性 Agent 代码无感地被 RL 环境托管。
|
||
Python 8.3K 行 + Go HTTP 代理,双栈容器(Daytona / Docker),双栈 Agent(mini-swe-agent / terminus2)。
|
||
</p>
|
||
|
||
<div class="stats">
|
||
<div class="stat"><div class="stat-value blue">8,339</div><div class="stat-label">Python LOC(非测试)</div></div>
|
||
<div class="stat"><div class="stat-value violet">3,561</div><div class="stat-label">测试 LOC</div></div>
|
||
<div class="stat"><div class="stat-value cyan">72</div><div class="stat-label">Python 文件</div></div>
|
||
<div class="stat"><div class="stat-value green">5</div><div class="stat-label">Go 文件(ares-proxy)</div></div>
|
||
<div class="stat"><div class="stat-value yellow">250→1M</div><div class="stat-label">步数上限范围</div></div>
|
||
<div class="stat"><div class="stat-value blue">~50</div><div class="stat-label">核心模式代码行数</div></div>
|
||
</div>
|
||
|
||
<div class="hero-meta">
|
||
<span><strong>版本</strong>:commit c804aa2</span>
|
||
<span><strong>上游</strong>:<a href="https://github.com/withmartian/ares" target="_blank">withmartian/ares</a></span>
|
||
<span><strong>License</strong>:MIT</span>
|
||
<span><strong>Python</strong>:≥ 3.12</span>
|
||
</div>
|
||
</div>
|
||
|
||
<section id="tldr">
|
||
<h2><span class="sec-num">§1</span>TL;DR</h2>
|
||
<p class="sec-lead">一页纸看懂 ARES 是什么 / 不是什么 / 值得抄什么。</p>
|
||
|
||
<div class="highlight-box">
|
||
<h4>🎯 一句话</h4>
|
||
<p>
|
||
ARES 不是 Agent 产品,也不是训练算法库——它是 <strong>Agent RL 的基础设施层</strong>:
|
||
把每个 LLM 请求变成 observation,把 LLM 响应变成 action,让训练框架(trl / verl / openpipe)能按 RL 循环驱动 Agent。
|
||
</p>
|
||
</div>
|
||
|
||
<div class="card-grid">
|
||
<div class="card">
|
||
<h4>核心抽象</h4>
|
||
<p><span class="tag tag-blue">dm_env</span><span class="tag tag-violet">async</span></p>
|
||
<p>LLMRequest = observation,LLMResponse = action,reward 从容器 <code>/reward.txt</code> 或 <code>/reward.json</code> 读出。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>最关键模式</h4>
|
||
<p><span class="tag tag-blue">50 行</span><span class="tag tag-green">全框架杠杆点</span></p>
|
||
<p><code>QueueMediatedLLMClient</code> 用 <code>asyncio.Queue + Future</code> 拦截 Agent 调用。Agent 线性代码无感被 RL 环境托管。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>双栈容器</h4>
|
||
<p><span class="tag tag-blue">云端默认</span><span class="tag tag-violet">本地备用</span></p>
|
||
<p>Daytona(10 次 retry + auto-stop)+ Docker(本地 build + tar 传输)。Janitor 用 atexit 兜底清理,防云资源泄露。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>双栈 Agent</h4>
|
||
<p><span class="tag tag-blue">轻量</span><span class="tag tag-violet">生产级</span></p>
|
||
<p>MiniSWECodeAgent(~260 行,SWE-bench 跑分)vs Terminus2Agent(1,110 行,tmux 持续会话 + 主动概括)。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>双栈拦截</h4>
|
||
<p><span class="tag tag-blue">同进程</span><span class="tag tag-violet">跨进程</span></p>
|
||
<p>in-process Python Queue(0 延迟)+ ares-proxy Go HTTP(跨容器/跨机器,RTT 10-100ms)。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>附加彩蛋</h4>
|
||
<p><span class="tag tag-yellow">mech_interp</span><span class="tag tag-green">TUI</span></p>
|
||
<p>transformer-lens 集成、激活抓取、线性探针、CAA 干预;Textual TUI 评估看板。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="positioning">
|
||
<h2><span class="sec-num">§2</span>生态位 · 它不是什么</h2>
|
||
<p class="sec-lead">理解 ARES 的第一步是搞清楚它和相邻层的边界。</p>
|
||
|
||
<table>
|
||
<tr><th style="width:35%">常见误解</th><th>实际</th></tr>
|
||
<tr><td>成品 Agent 产品(类 Manus)</td><td>❌ 不是终端产品,不直接服务用户</td></tr>
|
||
<tr><td>训练算法库(类 trl / verl)</td><td>❌ 不做 PPO / GRPO 权重更新</td></tr>
|
||
<tr><td>LLM 路由器(withmartian 主业)</td><td>❌ 那是另一条产品线</td></tr>
|
||
</table>
|
||
|
||
<h3>它在哪一层</h3>
|
||
<div class="ascii"><span class="hl-dim">应用层</span> <span class="hl-blue">Manus · HiClaw · OpenClaw · 手机 GUI Agent</span>
|
||
<span class="hl-dim">↑ 用 LLM 干活</span>
|
||
<span class="hl-dim">编排层</span> <span class="hl-violet">LangGraph · CrewAI · AutoGen</span>
|
||
<span class="hl-dim">↑ 多 Agent 协作</span>
|
||
<span class="hl-dim">基础设施</span> <span class="hl-green">★ ARES ★</span> <span class="hl-dim">← 你在这里</span>
|
||
<span class="hl-dim">↑ RL 环境 / 沙箱 / 观察-动作适配</span>
|
||
<span class="hl-dim">训练器</span> <span class="hl-yellow">trl · verl · OpenPipe · unsloth</span>
|
||
<span class="hl-dim">↑ PPO / GRPO / DPO 权重更新</span>
|
||
<span class="hl-dim">模型</span> <span class="hl-cyan">Qwen · Llama · GLM · Mistral</span></div>
|
||
|
||
<h3>谁会用它</h3>
|
||
<ul>
|
||
<li>做 Agent 后训练(post-training / fine-tuning)的研究员</li>
|
||
<li>SWE-bench Verified 类基准的大规模并行评估者</li>
|
||
<li>想把自家 Agent 接入"统一评估 + 统一沙箱"的工程团队</li>
|
||
<li>withmartian 自家的 Router / Agent 产品线</li>
|
||
</ul>
|
||
</section>
|
||
|
||
<section id="abstraction">
|
||
<h2><span class="sec-num">§3</span>核心抽象:Agent 当成 RL</h2>
|
||
<p class="sec-lead">ARES 实现了 <strong>dm_env 的 async 版本</strong>。一次 episode 就是 Agent 从接到任务到 reward 出现的全过程。</p>
|
||
|
||
<div class="ascii">┌─────────────────────────────────────────────────────┐
|
||
│ RL Loop │
|
||
│ │
|
||
│ Env.reset() ─────────→ <span class="hl-green">TimeStep(FIRST)</span> │
|
||
│ │ ↓ │
|
||
│ │ observation = <span class="hl-blue">LLMRequest</span> │
|
||
│ │ ↓ │
|
||
│ ↓ Agent LLM │
|
||
│ Env.step(action) ←───────── action = <span class="hl-violet">LLMResponse</span>│
|
||
│ │ ↑ │
|
||
│ ↓ agent 继续 │
|
||
│ <span class="hl-yellow">TimeStep(MID, reward=0)</span> ─→ 下一个 LLMRequest │
|
||
│ │ │
|
||
│ ↓ │
|
||
│ <span class="hl-green">TimeStep(LAST, reward)</span> ← 终止(250步/完成/错误) │
|
||
└─────────────────────────────────────────────────────┘</div>
|
||
|
||
<h3>Environment 协议</h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">Environment</span>(Protocol[ActionType, ObservationType, RewardType, DiscountType]):
|
||
<span class="kw">async def</span> <span class="fn">reset</span>(<span class="var">self</span>) -> <span class="cls">TimeStep</span>[ObservationType, RewardType, DiscountType]: ...
|
||
<span class="kw">async def</span> <span class="fn">step</span>(<span class="var">self</span>, action: ActionType) -> <span class="cls">TimeStep</span>[...]: ...
|
||
<span class="kw">async def</span> <span class="fn">close</span>(<span class="var">self</span>) -> <span class="kw">None</span>: ...</code></pre>
|
||
<p class="sec-lead">来源:<span class="file">src/ares/environments/base.py:71-138</span></p>
|
||
|
||
<h3>TimeStep 的三种状态</h3>
|
||
<table>
|
||
<tr><th>step_type</th><th>语义</th><th>reward</th><th>observation</th></tr>
|
||
<tr><td><code>FIRST</code></td><td>episode 开始</td><td><code>None</code>(强制)</td><td>首个 LLMRequest</td></tr>
|
||
<tr><td><code>MID</code></td><td>episode 进行中</td><td><code>0.0</code>(稀疏奖励)</td><td>下一个 LLMRequest</td></tr>
|
||
<tr><td><code>LAST</code></td><td>episode 结束</td><td>从 <code>/reward.*</code> 读出</td><td><code>None</code></td></tr>
|
||
</table>
|
||
<p class="sec-lead">来源:<span class="file">src/ares/environments/base.py:31-69</span></p>
|
||
</section>
|
||
|
||
<section id="architecture">
|
||
<h2><span class="sec-num">§4</span>架构全图</h2>
|
||
<p class="sec-lead">从公开 API 到 Go 代理的五层栈。</p>
|
||
|
||
<div class="ascii">┌──────────────────────────────────────────────────────────────┐
|
||
│ <span class="hl-blue">Public API (__init__.py)</span> │
|
||
│ ares.make() · ares.info() · @register_env · TimeStep │
|
||
└──────────────────────────────────────────────────────────────┘
|
||
│
|
||
↓
|
||
┌──────────────────────────────────────────────────────────────┐
|
||
│ <span class="hl-violet">Registry (registry.py) + Presets (presets.py)</span> │
|
||
│ · HarborSpec × {mini_swe_agent, terminus2_agent} │
|
||
│ · TwentyQuestionsSpec │
|
||
│ · Selector 语法: <span class="hl-cyan">sbv-mswea:0:10, sbv-mswea@2/8</span> │
|
||
└──────────────────────────────────────────────────────────────┘
|
||
│
|
||
↓
|
||
┌────────────────────────┐ ┌─────────────────────────────┐
|
||
│ <span class="hl-blue">CodeEnvironment</span> │────→│ <span class="hl-violet">Container</span> │
|
||
│ (code_env.py) │ │ ├── DaytonaContainer (云) │
|
||
│ · reset / step 主循环 │ │ └── DockerContainer (本地) │
|
||
│ · 250 步上限 │ │ <span class="hl-yellow">Janitor (atexit 兜底)</span> │
|
||
│ · reward 读取 │ └─────────────────────────────┘
|
||
└────────────────────────┘
|
||
│
|
||
↓ 启动 agent 作为独立 asyncio Task
|
||
┌────────────────────────┐ ┌─────────────────────────────┐
|
||
│ <span class="hl-blue">CodeAgent</span> (protocol) │────→│ <span class="hl-violet">LLMClient</span> (protocol) │
|
||
│ ├── MiniSWECodeAgent │ │ ├── <span class="hl-green">QueueMediatedLLMClient</span> │
|
||
│ └── Terminus2Agent │ │ │ ← <span class="hl-green">拦截到环境</span> │
|
||
│ (tmux + 概括) │ │ ├── ChatCompletionClient │
|
||
└────────────────────────┘ │ ├── LlamaCppClient (本地) │
|
||
│ │ └── HookedTransformerClient │
|
||
↓ └─────────────────────────────┘
|
||
┌──────────────────────────────┐
|
||
│ <span class="hl-yellow">ares-proxy (Go)</span> │
|
||
│ HTTP 版 queue-mediated │
|
||
│ 用于跨进程/跨容器拦截 │
|
||
└──────────────────────────────┘</div>
|
||
</section>
|
||
|
||
<section id="environment">
|
||
<h2><span class="sec-num">§5</span>Environment 层</h2>
|
||
<p class="sec-lead">RL 主循环的编排者。管容器生命周期、驱动 Agent Task、收集 reward。</p>
|
||
|
||
<h3>CodeEnvironment 的签名</h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">CodeEnvironment</span>(base.Environment[
|
||
response.<span class="cls">LLMResponse</span>, <span class="com"># ActionType</span>
|
||
request.<span class="cls">LLMRequest</span> | <span class="kw">None</span>, <span class="com"># ObservationType</span>
|
||
<span class="cls">float</span>, <span class="com"># RewardType</span>
|
||
<span class="cls">float</span>, <span class="com"># DiscountType</span>
|
||
])</code></pre>
|
||
|
||
<h3>reset() 流程 <span class="file">code_env.py:95-127</span></h3>
|
||
<ol>
|
||
<li>清空步数,停旧容器(<span class="file">:104-108</span>)</li>
|
||
<li>随机选任务 <code>_reset_task()</code>(<span class="file">:110</span>)</li>
|
||
<li>启动容器 <code>_start_container()</code>(<span class="file">:111</span>)</li>
|
||
<li>启动 agent 作为独立 asyncio Task(<span class="file">:112</span>)——Agent 代码线性,环境在后台跑它</li>
|
||
<li>等 agent 发出第一个 LLM 请求 <code>_get_time_step()</code>(<span class="file">:114</span>)</li>
|
||
<li>包装成 <code>FIRST</code> TimeStep 返回</li>
|
||
</ol>
|
||
|
||
<h3>step(action) 流程 <span class="file">code_env.py:129-161</span></h3>
|
||
<ol>
|
||
<li>步数 +1(<span class="file">:138</span>)</li>
|
||
<li>把 <code>action</code> 喂回 agent:<code>_llm_req_future.set_result(action)</code>(<span class="file">:142</span>)—— <strong>唤醒 agent 的 await</strong></li>
|
||
<li>等 agent 下一个 LLM 请求或 agent 任务完成(<span class="file">:146</span>)</li>
|
||
<li>步数超限(默认 250)强制 <code>LAST</code>,取消 agent task(<span class="file">:148-153</span>)</li>
|
||
<li>否则返回 <code>MID(reward=0.0)</code>,episode 终止才算分</li>
|
||
</ol>
|
||
|
||
<h3>Reward 双格式读取 <span class="file">code_env.py:302-315</span></h3>
|
||
<div class="compare">
|
||
<div class="compare-col a">
|
||
<h5>/reward.txt</h5>
|
||
<p>直接 <code>float(content)</code>。最简单场景用。</p>
|
||
</div>
|
||
<div class="compare-col b">
|
||
<h5>/reward.json</h5>
|
||
<p>解析 JSON,取<strong>唯一 key</strong> 的 value。Harbor 数据集约定。</p>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>Episode 终止三条路</h3>
|
||
<table>
|
||
<tr><th>触发</th><th>位置</th><th>终局</th></tr>
|
||
<tr><td>Agent Task 完成</td><td><span class="file">:174-193</span></td><td>LAST(reward 从 /reward.* 读)</td></tr>
|
||
<tr><td>步数超限</td><td><span class="file">:148-153</span></td><td>LAST(reward=上一 reward)</td></tr>
|
||
<tr><td>已 LAST 再 step</td><td><span class="file">:135-136</span></td><td>抛异常,要求 reset</td></tr>
|
||
</table>
|
||
</section>
|
||
|
||
<section id="container">
|
||
<h2><span class="sec-num">§6</span>Container 层</h2>
|
||
<p class="sec-lead">隔离 Agent 执行环境。双实现覆盖云 / 本地,Janitor 兜底防泄露。</p>
|
||
|
||
<h3>Container Protocol <span class="file">containers/containers.py:24-131</span></h3>
|
||
<pre><code><span class="kw">async def</span> <span class="fn">start</span>(env: <span class="cls">dict</span>[<span class="cls">str</span>, <span class="cls">str</span>] | <span class="kw">None</span>) -> <span class="kw">None</span>
|
||
<span class="kw">async def</span> <span class="fn">exec_run</span>(command, workdir, env, timeout_s) -> <span class="cls">ExecResult</span>
|
||
<span class="kw">async def</span> <span class="fn">upload_files</span>/download_files/upload_dir/download_dir
|
||
<span class="kw">def</span> <span class="fn">stop_and_remove</span>() -> <span class="kw">None</span> <span class="com"># 唯一同步方法,给 atexit 用</span></code></pre>
|
||
|
||
<h3>双实现对比</h3>
|
||
<table>
|
||
<tr><th>特性</th><th>Daytona(云,默认)</th><th>Docker(本地)</th></tr>
|
||
<tr><td>启动介质</td><td>云 API</td><td>docker-py,本地 build</td></tr>
|
||
<tr><td>重试</td><td><strong>10 次指数退避</strong><br><span class="file">daytona.py:35-46</span></td><td>无</td></tr>
|
||
<tr><td>超时处理</td><td>抛 TimeoutError,不重试</td><td><code>asyncio.wait_for</code></td></tr>
|
||
<tr><td>文件传输</td><td>原生 SDK <code>sbx.fs.upload_files()</code></td><td>tar 打包 <code>put_archive()</code></td></tr>
|
||
<tr><td>资源配置</td><td>CPU / Memory / Disk / GPU</td><td>❌ TODO 未支持</td></tr>
|
||
<tr><td>清理</td><td>auto_stop(30min) + auto_delete(0)</td><td>force remove</td></tr>
|
||
<tr><td>挂起方式</td><td>Sandbox 自管</td><td><code>tail -f /dev/null</code><br><span class="file">docker.py:83</span></td></tr>
|
||
</table>
|
||
|
||
<div class="callout">
|
||
<strong>Docker 坑点</strong>:不写 <code>tail -f /dev/null</code>,容器 CMD 执行完就会退出。这是常见的"容器秒退"问题的标准解法。
|
||
</div>
|
||
|
||
<h3>Janitor atexit 兜底 <span class="file">code_env.py:348-389</span></h3>
|
||
<div class="ascii"><span class="hl-dim">正常流程:</span>
|
||
async with env: ─→ <span class="hl-green">__aenter__</span>:注册到 _ENVIRONMENT_JANITOR
|
||
─→ ... 使用 ...
|
||
─→ <span class="hl-green">__aexit__</span>:unregister,正常清理
|
||
|
||
<span class="hl-dim">异常流程(进程被 kill、Ctrl-C):</span>
|
||
__init__ 时已 atexit.register(_sync_cleanup)
|
||
─→ <span class="hl-yellow">atexit 触发</span>:遍历所有注册环境
|
||
─→ 每个调 <span class="hl-red">container.stop_and_remove()</span>(<strong>同步</strong>)
|
||
─→ 云资源被删除,不泄露</div>
|
||
|
||
<p><strong>关键设计约束</strong>:atexit 不能跑 async,所以 <code>Container</code> 协议强制提供同步的 <code>stop_and_remove()</code>。</p>
|
||
</section>
|
||
|
||
<section id="agent">
|
||
<h2><span class="sec-num">§7</span>Code Agent 层 · 双栈 Agent 对比</h2>
|
||
<p class="sec-lead">ARES 内置两种 Agent:轻量跑分的 <strong>MiniSWECodeAgent</strong> 和生产级复杂度的 <strong>Terminus2Agent</strong>。</p>
|
||
|
||
<h3>协议</h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">CodeAgent</span>(Protocol):
|
||
<span class="kw">async def</span> <span class="fn">run</span>(<span class="var">self</span>, task: <span class="cls">str</span>) -> <span class="kw">None</span></code></pre>
|
||
|
||
<div class="compare">
|
||
<div class="compare-col a">
|
||
<h5>MiniSWECodeAgent</h5>
|
||
<p class="sec-lead" style="margin: 0.3rem 0 0.8rem;">轻量级,封装 mini-swe-agent 库</p>
|
||
<ul>
|
||
<li><strong>步数上限</strong>:250</li>
|
||
<li><strong>会话</strong>:每步无状态</li>
|
||
<li><strong>循环</strong>:step → query → execute_action</li>
|
||
<li><strong>bash 解析</strong>:markdown 代码块</li>
|
||
<li><strong>错误</strong>:异常分层(<code>_NonTerminatingError</code> / <code>_TerminatingError</code>)</li>
|
||
<li><strong>代码量</strong>:约 260 行</li>
|
||
<li><strong>适用</strong>:SWE-bench 快速跑分</li>
|
||
</ul>
|
||
<p class="sec-lead" style="margin-top: 0.6rem;"><span class="file">mini_swe_agent.py:156-258</span></p>
|
||
</div>
|
||
<div class="compare-col b">
|
||
<h5>Terminus2Agent</h5>
|
||
<p class="sec-lead" style="margin: 0.3rem 0 0.8rem;">生产级,Terminal-Bench 的 tmux 会话</p>
|
||
<ul>
|
||
<li><strong>步数上限</strong>:1,000,000</li>
|
||
<li><strong>会话</strong>:tmux 持续会话,160×40 分辨率,50k 历史</li>
|
||
<li><strong>循环</strong>:tmux check → query → parse → execute → 两步完成确认</li>
|
||
<li><strong>上下文</strong>:主动概括(200k token)+ 被动救援(context_length_exceeded)</li>
|
||
<li><strong>输出追踪</strong>:增量定位(rfind 锚点)</li>
|
||
<li><strong>Parser</strong>:JSON / XML 可切换,三级降级</li>
|
||
<li><strong>代码量</strong>:1,110 行</li>
|
||
<li><strong>适用</strong>:长期交互、超长轨迹</li>
|
||
</ul>
|
||
<p class="sec-lead" style="margin-top: 0.6rem;"><span class="file">terminus2/terminus2_agent.py:482-849</span></p>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>Parser 三级降级 <span class="file">terminus2/json_parser.py:75-99</span></h3>
|
||
<div class="highlight-box">
|
||
<h4>容错 Parser 模板(可直接抄)</h4>
|
||
<pre style="margin: 0.8rem 0 0;"><code><span class="kw">try</span>:
|
||
data = json.loads(json_str)
|
||
<span class="kw">except</span> <span class="cls">JSONDecodeError</span>:
|
||
json_str = <span class="var">self</span>._auto_fix_json(json_str) <span class="com"># Level 2: 补括号 / 引号</span>
|
||
<span class="kw">try</span>:
|
||
data = json.loads(json_str)
|
||
<span class="kw">except</span> <span class="cls">JSONDecodeError</span>:
|
||
fallback = <span class="var">self</span>._parse_with_regex(original) <span class="com"># Level 3: regex 降级</span></code></pre>
|
||
<p style="margin-top: 0.8rem; margin-bottom: 0;">XML 侧还有 <code>salvage_truncated_response</code>,从被截断的响应中抢救合法标签。</p>
|
||
</div>
|
||
|
||
<div class="callout">
|
||
<strong>坑点</strong>:200k 阈值和 <code>2 字符 = 1 token</code> 估算都是硬编码。对英文 / Unicode 混杂的 tmux 输出不完全准确。生产需按模型实际 tokenizer 调。
|
||
</div>
|
||
</section>
|
||
|
||
<section id="queue-mediated">
|
||
<h2><span class="sec-num">§8</span>Queue-Mediated ⭐ 全框架杠杆点</h2>
|
||
<p class="sec-lead">
|
||
整个 ARES 最精妙的 50 行代码。它让 <strong>线性 Agent 代码</strong>(随手写一堆 <code>await llm(...)</code>)和 <strong>RL 环境协议</strong>(强制 <code>reset/step/close</code> 循环)无感接合。
|
||
</p>
|
||
|
||
<h3>核心 50 行 <span class="file">queue_mediated_client.py:47-50</span></h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">QueueMediatedLLMClient</span>:
|
||
q: asyncio.<span class="cls">Queue</span>[<span class="cls">ValueAndFuture</span>[<span class="cls">LLMRequest</span>, <span class="cls">LLMResponse</span>]]
|
||
|
||
<span class="kw">async def</span> <span class="fn">__call__</span>(<span class="var">self</span>, req: <span class="cls">LLMRequest</span>) -> <span class="cls">LLMResponse</span>:
|
||
future = asyncio.<span class="fn">Future</span>[<span class="cls">LLMResponse</span>]()
|
||
<span class="kw">await</span> <span class="var">self</span>.q.<span class="fn">put</span>(<span class="cls">ValueAndFuture</span>(value=req, future=future))
|
||
<span class="kw">return await</span> future <span class="com"># ← Agent 挂在这里</span></code></pre>
|
||
|
||
<h3>它是怎么工作的</h3>
|
||
<div class="ascii"> <span class="hl-blue">Agent 侧(线性代码)</span> <span class="hl-violet">环境侧(RL 循环)</span>
|
||
───────────────── ─────────────────
|
||
|
||
<span class="hl-dim">agent.run():</span>
|
||
response = <span class="hl-green">await</span> llm_client(req) <span class="hl-dim">(1)</span>
|
||
│ ┌─── env.step(action):
|
||
↓ │
|
||
<span class="hl-yellow">put((req, future)) 入 Queue</span> <span class="hl-dim">(2)</span> │
|
||
│ │
|
||
↓ │
|
||
<span class="hl-red">await future # 挂住!</span> <span class="hl-dim">(3)</span> │
|
||
│
|
||
<span class="hl-dim">(4)</span> ←─── env.q.get() 拿 (req, future)
|
||
│
|
||
↓
|
||
<span class="hl-dim">(5)</span> return TimeStep(obs=req) ← 训练器收到
|
||
│
|
||
↓
|
||
<span class="hl-dim">(6)</span> 训练器算出 action(下一个 LLMResponse)
|
||
│
|
||
↓
|
||
<span class="hl-dim">(7)</span> env.step(action):
|
||
future.<span class="hl-green">set_result(action)</span>
|
||
<span class="hl-green">↓</span> │
|
||
<span class="hl-green">future 返回!</span> <span class="hl-dim">(8)</span> │
|
||
response 被赋值 │
|
||
继续 Agent 下一行代码 │</div>
|
||
|
||
<p><strong>结果</strong>:</p>
|
||
<ul>
|
||
<li><strong>Agent 作者</strong>:正常写 <code>response = await llm(req)</code>,完全不知道自己被托管</li>
|
||
<li><strong>训练者</strong>:拿到符合 dm_env 规范的 <code>reset/step</code>,可以喂给任何训练框架</li>
|
||
<li><strong>零妥协</strong>:不需要 Agent 实现"RL-aware 接口",也不需要训练器懂 Agent 内部</li>
|
||
</ul>
|
||
|
||
<h3>支撑抽象:ValueAndFuture <span class="file">async_utils.py</span></h3>
|
||
<pre><code><span class="kw">@dataclasses.dataclass(frozen=<span class="kw">True</span>)</span>
|
||
<span class="kw">class</span> <span class="cls">ValueAndFuture</span>[ValType, FutureType]:
|
||
value: ValType
|
||
future: asyncio.<span class="cls">Future</span>[FutureType]</code></pre>
|
||
<p>8 行泛型 dataclass。任何"把值和响应承诺打包传递"的场景都能抄:模拟器、游戏引擎、多租户推理队列、RPC 中间件。</p>
|
||
|
||
<div class="highlight-box">
|
||
<h4>💡 值得抄</h4>
|
||
<p>这是那种"看一眼就该记住"的模式。比 RxJS 的 Subject 简单得多,但在 async Python 场景里解决了"外部事件驱动 + 内部线性代码"的终极矛盾。</p>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="llm-client">
|
||
<h2><span class="sec-num">§9</span>LLM Client 层</h2>
|
||
<p class="sec-lead">多个实现共享 <code>LLMClient</code> 协议。拦截版、API 版、本地版、可解释性版各司其职。</p>
|
||
|
||
<h3>协议</h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">LLMClient</span>(Protocol):
|
||
<span class="kw">async def</span> <span class="fn">__call__</span>(<span class="var">self</span>, request: <span class="cls">LLMRequest</span>) -> <span class="cls">LLMResponse</span></code></pre>
|
||
|
||
<div class="card-grid">
|
||
<div class="card">
|
||
<h4>QueueMediatedLLMClient ⭐</h4>
|
||
<p><span class="tag tag-green">核心</span></p>
|
||
<p>RL 拦截版,把请求塞进 queue 等 future。见 §8。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>ChatCompletionCompatibleLLMClient</h4>
|
||
<p><span class="tag tag-blue">API 默认</span></p>
|
||
<p>OpenAI 兼容 HTTP 客户端。Martian API 默认后端。线程局部 httpx + tenacity 3 次重试 + 成本追踪。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>LlamaCppLLMClient</h4>
|
||
<p><span class="tag tag-violet">本地 GGUF</span></p>
|
||
<p>对接本地 GGUF 模型。<code>asyncio.to_thread()</code> 包装阻塞推理。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>HookedTransformerLLMClient</h4>
|
||
<p><span class="tag tag-yellow">mech_interp</span></p>
|
||
<p>底层 transformer-lens,支持抓中间激活 / 钩子干预。</p>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>ChatCompletionCompatibleLLMClient 的四个亮点</h3>
|
||
|
||
<h4>1. 线程局部 httpx 客户端 <span class="file">chat_completions_compatible.py:22-41</span></h4>
|
||
<pre><code>_thread_local = threading.<span class="fn">local</span>()
|
||
|
||
<span class="kw">def</span> <span class="fn">_get_llm_client</span>(base_url, api_key):
|
||
key = (base_url, api_key)
|
||
clients = <span class="fn">getattr</span>(_thread_local, <span class="str">"clients"</span>, {})
|
||
<span class="kw">if</span> key <span class="kw">not in</span> clients:
|
||
clients[key] = openai.<span class="cls">AsyncClient</span>(...)
|
||
_thread_local.clients = clients
|
||
<span class="kw">return</span> clients[key]</code></pre>
|
||
<p><strong>为什么要这样写</strong>:<code>httpx.AsyncClient</code> 绑创建线程的 event loop。跨线程用同一个实例会死锁。线程局部是最优雅的解。</p>
|
||
|
||
<h4>2. Tenacity 装饰器 <span class="file">:44-53</span></h4>
|
||
<pre><code><span class="kw">@tenacity.retry</span>(
|
||
stop=tenacity.<span class="fn">stop_after_attempt</span>(<span class="num">3</span>),
|
||
wait=tenacity.<span class="fn">wait_exponential</span>(min=<span class="num">1</span>, max=<span class="num">60</span>) + tenacity.<span class="fn">wait_random</span>(min=<span class="num">0</span>, max=<span class="num">1</span>),
|
||
before_sleep=tenacity.<span class="fn">before_sleep_log</span>(_LOGGER, logging.INFO),
|
||
)</code></pre>
|
||
<p>3 次尝试 + 指数退避 + 随机抖动。异步 API 客户端通用模板。</p>
|
||
|
||
<h4>3. GPT-5 特判 <span class="file">:66-67</span></h4>
|
||
<p>GPT-5 不支持 <code>temperature</code>,动态移除参数。提醒你:模型家族碎片化在 API 适配层是常态。</p>
|
||
|
||
<h4>4. 成本内置 <span class="file">:72</span> + <span class="file">accounting.py:70-97</span></h4>
|
||
<p>每个 <code>LLMResponse</code> 带 <code>cost</code> 字段。价目表从 Martian API 拉取(LRU 缓存),按 prompt/completion token 累加。</p>
|
||
<div class="callout">
|
||
<strong>注意</strong>:<strong>不计入</strong> <code>cached_tokens</code> 和 <code>reasoning_tokens</code>(Martian 当前未区分)。对 GPT-o3 / Claude 3.7 thinking 场景要另行处理。
|
||
</div>
|
||
|
||
<h3>转换层:两份而非一份</h3>
|
||
<p>OpenAI 的 <strong>Chat Completions API</strong> 和 <strong>Responses API</strong> 的消息/工具结构完全不同,单一转换器写起来会很乱,所以 ARES 拆了两份各司其职。</p>
|
||
|
||
<table>
|
||
<tr><th></th><th>openai_chat_converter.py (395 行)</th><th>openai_responses_converter.py (435 行)</th></tr>
|
||
<tr><td>目标 API</td><td>Chat Completions</td><td>Responses</td></tr>
|
||
<tr><td>system prompt</td><td>messages[0] = system 角色</td><td><code>instructions</code> 参数</td></tr>
|
||
<tr><td>工具调用</td><td>展平成 <code>AssistantMessage.tool_calls</code></td><td>多态 <code>input</code> 数组</td></tr>
|
||
<tr><td>损失检测</td><td><code>top_k</code>、<code>stop_sequences>4</code></td><td><code>stop_sequences</code> 完全不支持</td></tr>
|
||
</table>
|
||
</section>
|
||
|
||
<section id="ares-proxy">
|
||
<h2><span class="sec-num">§10</span>ares-proxy · Go HTTP 版 Queue-Mediated</h2>
|
||
<p class="sec-lead">
|
||
in-process Queue 只在同一 Python 进程内有效。Agent 在隔离容器跑时,需要把队列搬到 HTTP。
|
||
<strong>ares-proxy 就是这个跨进程版本</strong>,用 Go 实现。
|
||
</p>
|
||
|
||
<h3>为什么要 ares-proxy</h3>
|
||
<div class="compare">
|
||
<div class="compare-col a">
|
||
<h5>in-process Queue</h5>
|
||
<p>Agent 和环境同一个 Python 进程。</p>
|
||
<ul>
|
||
<li>0 网络延迟</li>
|
||
<li>asyncio 原生</li>
|
||
<li>单机单进程才能用</li>
|
||
</ul>
|
||
</div>
|
||
<div class="compare-col b">
|
||
<h5>ares-proxy (Go HTTP)</h5>
|
||
<p>Agent 跑在 Docker / Daytona 容器,通过 HTTP 跨进程通信。</p>
|
||
<ul>
|
||
<li>RTT 10-100ms</li>
|
||
<li>goroutine + channel</li>
|
||
<li>跨进程 / 跨容器 / 跨机器都能用</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>三端点数据流</h3>
|
||
<div class="ascii"> <span class="hl-blue">容器内 Agent</span> <span class="hl-violet">宿主 Environment</span>
|
||
│ │
|
||
├──<span class="hl-green">POST /v1/chat/completions</span>──→ │
|
||
│ (阻塞等响应) │
|
||
│ │
|
||
│ ←──<span class="hl-yellow">GET /poll</span>──┤
|
||
│ (拿请求) │
|
||
│ │ │
|
||
│ ┌──┘ │
|
||
│ ↓ │
|
||
│ <span class="hl-dim">Python 环境处理</span> │
|
||
│ return LLMResponse │
|
||
│ │ │
|
||
│ <span class="hl-green">──POST /respond──→</span>
|
||
│ │
|
||
│◀──────────────响应回到 Agent────────────────┘</div>
|
||
|
||
<h3>端点实现对照</h3>
|
||
<table>
|
||
<tr><th>端点</th><th>文件:行</th><th>行为</th></tr>
|
||
<tr>
|
||
<td><code>POST /v1/chat/completions</code></td>
|
||
<td><span class="file">main.go:34-59</span><br><span class="file">broker.go:36-73</span></td>
|
||
<td>生成 UUID,创建 <code>responseChan</code>,加入 map + 队列,阻塞等(默认 15min timeout)</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>GET /poll</code></td>
|
||
<td><span class="file">main.go:64-80</span><br><span class="file">broker.go:90-102</span></td>
|
||
<td>原子读整个 <code>requestQueue</code>,<strong>立即清空</strong>(<span class="file">:99</span>),返回 JSON 数组</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>POST /respond</code></td>
|
||
<td><span class="file">main.go:85-109</span><br><span class="file">broker.go:106-122</span></td>
|
||
<td>查 ID,<code>responseChan <- response</code>,关闭通道</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<h3>Broker 数据结构 <span class="file">broker.go:14-22</span></h3>
|
||
<pre><code><span class="kw">type</span> <span class="cls">Broker</span> <span class="kw">struct</span> {
|
||
mutex sync.Mutex
|
||
pendingRequests map[<span class="kw">string</span>]<span class="kw">chan</span> json.RawMessage <span class="com">// ID → 响应通道</span>
|
||
requestQueue []<span class="cls">PendingRequest</span> <span class="com">// 待轮询队列</span>
|
||
}</code></pre>
|
||
|
||
<h3>为什么选 Go</h3>
|
||
<ul>
|
||
<li>goroutine + channel 天然适合队列代理</li>
|
||
<li>纯 stdlib,无外部依赖</li>
|
||
<li>单二进制部署,扔进任何容器都能跑</li>
|
||
<li>Python 做这个反而要装 httpx / aiohttp / uvicorn 一堆</li>
|
||
</ul>
|
||
|
||
<div class="callout">
|
||
<strong>坑点</strong>:<code>responseChan</code> 缓冲大小 = 1(<span class="file">broker.go:41</span>)。如果 Agent 不及时读取响应,会堵塞后续处理。高并发场景建议调大。
|
||
</div>
|
||
</section>
|
||
|
||
<section id="registry">
|
||
<h2><span class="sec-num">§11</span>Registry + Presets + 任务切片</h2>
|
||
<p class="sec-lead">用字符串魔法 <code>"sbv-mswea:0:10"</code> 精准定位 "SWE-bench Verified 上 mini-swe-agent 的前 10 个任务"。</p>
|
||
|
||
<h3>三种 Selector <span class="file">registry.py:31-217</span></h3>
|
||
<table>
|
||
<tr><th>Selector</th><th>构造</th><th>行为</th></tr>
|
||
<tr><td><code>IndexSelector(5)</code></td><td><span class="file">:47-58</span></td><td><code>tasks[5]</code></td></tr>
|
||
<tr><td><code>SliceSelector(0, 10)</code></td><td><span class="file">:62-75</span></td><td><code>tasks[0:10]</code></td></tr>
|
||
<tr><td><code>ShardSelector(2, 8)</code></td><td><span class="file">:79-109</span></td><td>均匀分 8 片取第 2 片</td></tr>
|
||
</table>
|
||
|
||
<h3>语法糖 <span class="file">parse_selector:112-217</span></h3>
|
||
<pre><code><span class="str">"sbv-mswea"</span> <span class="com"># 全选 → SliceSelector(None, None)</span>
|
||
<span class="str">"sbv-mswea:5"</span> <span class="com"># 单任务 → IndexSelector(5)</span>
|
||
<span class="str">"sbv-mswea:0:10"</span> <span class="com"># 切片 → SliceSelector(0, 10)</span>
|
||
<span class="str">"sbv-mswea:5:"</span> <span class="com"># 从 5 到末尾 → SliceSelector(5, None)</span>
|
||
<span class="str">"sbv-mswea@2/8"</span> <span class="com"># 第 2/8 片(分布式评估) → ShardSelector(2, 8)</span></code></pre>
|
||
|
||
<h3>已注册预设 <span class="file">presets.py</span></h3>
|
||
<div class="compare">
|
||
<div class="compare-col a">
|
||
<h5>HarborSpec 系列 <span class="file">:39-82</span></h5>
|
||
<p>从 <code>code_env.list_harbor_datasets()</code> 动态枚举所有 Harbor 数据集,× {mini_swe_agent, terminus2_agent} 笛卡尔积。</p>
|
||
<p>命名:<code>{dataset_id}-{agent_id}</code></p>
|
||
<p>例如:<code>sbv-mswea</code>、<code>sbv-terminus2</code></p>
|
||
</div>
|
||
<div class="compare-col b">
|
||
<h5>TwentyQuestionsSpec <span class="file">:85-119</span></h5>
|
||
<p>20 Questions 猜谜游戏(无容器,纯文本)。</p>
|
||
<p>125 个内置对象。</p>
|
||
<p>展示 ARES <strong>非 SWE-bench</strong> 的能力边界。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="examples">
|
||
<h2><span class="sec-num">§12</span>Examples · 渐进式学习梯度</h2>
|
||
<p class="sec-lead">四个示例精心设计,每一步只换一个组件,体现 ARES 的模块化。</p>
|
||
|
||
<div class="card-grid">
|
||
<div class="card">
|
||
<h4>01_sequential_eval_with_local_llm.py</h4>
|
||
<p><span class="tag tag-green">最小</span></p>
|
||
<p>最小循环:<code>async with ares.make("sbv-mswea:0")</code>。用 <code>llama_cpp</code> 加载本地 Qwen2-0.5B。默认 Docker 容器。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>02_sequential_eval_with_api.py</h4>
|
||
<p><span class="tag tag-blue">API 切换</span></p>
|
||
<p><strong>唯一差别</strong>:agent 换成 <code>ChatCompletionCompatibleLLMClient(model="openai/gpt-5-mini")</code>。环境/容器代码一模一样。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>03_parallel_eval_with_api.py</h4>
|
||
<p><span class="tag tag-violet">并行核心</span></p>
|
||
<p>Semaphore(20) + gather + TUI 看板。几百任务同时跑。见下方剖析。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>20q_case_study/</h4>
|
||
<p><span class="tag tag-yellow">可解释性</span></p>
|
||
<p>5 阶段:采集激活 → 训探针 → 方向识别 → CAA 干预 → 因果验证。展示非 SWE-bench 应用。</p>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>并行机制拆解 <span class="file">examples/03_parallel_eval_with_api.py</span></h3>
|
||
<pre><code><span class="com"># Semaphore 流控</span>
|
||
sem = asyncio.<span class="fn">Semaphore</span>(args.num_parallel_workers) <span class="com"># 默认 20</span>
|
||
|
||
<span class="com"># 装饰器包装:每个任务抢信号量</span>
|
||
<span class="kw">async def</span> <span class="fn">_await_with_semaphore</span>(coro):
|
||
<span class="kw">async with</span> sem:
|
||
<span class="kw">return await</span> coro
|
||
|
||
<span class="com"># gather 批量启动</span>
|
||
tasks = [_await_with_semaphore(run_one(task_id)) <span class="kw">for</span> task_id <span class="kw">in</span> task_ids]
|
||
results = <span class="kw">await</span> asyncio.<span class="fn">gather</span>(*tasks, return_exceptions=<span class="kw">True</span>)</code></pre>
|
||
|
||
<h4>并行瓶颈</h4>
|
||
<ul>
|
||
<li><strong>num_parallel_workers</strong>:Semaphore 上限(默认 20)</li>
|
||
<li><strong>容器工厂配额</strong>:Daytona API 并发创建配额</li>
|
||
<li><strong>单点 CPU / 内存</strong>:TUI Dashboard 渲染 + asyncio 调度</li>
|
||
</ul>
|
||
</section>
|
||
|
||
<section id="testing">
|
||
<h2><span class="sec-num">§13</span>测试 + Mock 体系</h2>
|
||
<p class="sec-lead">单元测试用 mock,集成测试用真容器。</p>
|
||
|
||
<div class="compare">
|
||
<div class="compare-col a">
|
||
<h5>单元测试 · Mock</h5>
|
||
<p><strong>MockContainer</strong> <span class="file">testing/mock_container.py:10-130</span></p>
|
||
<p>记录所有 <code>exec_commands</code> / <code>uploaded_files</code> / <code>downloaded_files</code>。支持 <code>exec_handler</code> 回调动态生成响应。</p>
|
||
<p style="margin-top:0.8rem;"><strong>MockLLMClient</strong> <span class="file">testing/mock_llm.py:10-72</span></p>
|
||
<p>循环预设响应列表 / 自定义 <code>response_handler</code>。记录全部请求,<code>get_last_request()</code> 断言入口。</p>
|
||
</div>
|
||
<div class="compare-col b">
|
||
<h5>集成测试 · 真 Daytona</h5>
|
||
<p><strong>test_default_workdir.py</strong> <span class="file">integration_tests/:L10-48</span></p>
|
||
<p>验证 SWE-bench <code>/testbed</code> vs TerminalBench <code>/app</code> 工作目录。</p>
|
||
<p>流程:<code>ares.make(preset) → reset() → exec_run("pwd") → 断言</code></p>
|
||
<p>用 Daytona(避开本地 Docker 兼容问题)。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="tracking">
|
||
<h2><span class="sec-num">§14</span>StatTracker · 三实现一协议</h2>
|
||
<p class="sec-lead">时序指标和标量的非侵入式追踪。</p>
|
||
|
||
<h3>协议 <span class="file">stat_tracker.py:16-21</span></h3>
|
||
<pre><code><span class="kw">class</span> <span class="cls">StatTracker</span>(Protocol):
|
||
<span class="kw">@contextlib.contextmanager</span>
|
||
<span class="kw">def</span> <span class="fn">timeit</span>(<span class="var">self</span>, name: <span class="cls">str</span>) -> Generator: ...
|
||
<span class="kw">def</span> <span class="fn">scalar</span>(<span class="var">self</span>, name: <span class="cls">str</span>, value: <span class="cls">float</span>) -> <span class="kw">None</span>: ...</code></pre>
|
||
|
||
<h3>三种实现</h3>
|
||
<table>
|
||
<tr><th>实现</th><th>位置</th><th>机制</th></tr>
|
||
<tr><td><code>NullStatTracker</code></td><td><span class="file">stat_tracker.py:23-30</span></td><td>无操作,生产低开销路径</td></tr>
|
||
<tr><td><code>LoggingStatTracker</code></td><td><span class="file">:33-62</span></td><td>后台任务每 60s 打分位数(p0/p25/p50/p75/p100),<code>np.percentile()</code></td></tr>
|
||
<tr><td><code>TensorboardStatTracker</code></td><td><span class="file">tensorboard.py:14-42</span></td><td>60s 周期 <code>SummaryWriter.add_histogram()</code></td></tr>
|
||
</table>
|
||
|
||
<div class="callout red">
|
||
<strong>约束 / 坑</strong>:
|
||
<br>· 无 MLflow / wandb 集成(仅 tensorboard)
|
||
<br>· 60s 周期硬编码
|
||
<br>· 无接口配置
|
||
<br>· 想接 wandb 只能自己实现 Protocol
|
||
</div>
|
||
</section>
|
||
|
||
<section id="mech-interp">
|
||
<h2><span class="sec-num">§15</span>mech_interp · 机制可解释性附加</h2>
|
||
<p class="sec-lead">ARES 不仅能跑分,还支持可解释性研究的完整闭环。这是 withmartian 的亮点附加。</p>
|
||
|
||
<h3>三个核心组件 <span class="file">contrib/mech_interp/</span></h3>
|
||
<table>
|
||
<tr><th>文件</th><th>行</th><th>作用</th></tr>
|
||
<tr><td><code>hooked_transformer_client.py</code></td><td>13-140</td><td>实现 LLMClient,底层 <code>transformer-lens.HookedTransformer.generate()</code></td></tr>
|
||
<tr><td><code>activation_capture.py</code></td><td>13-89</td><td><code>TrajectoryActivations</code>:列表存每步 <code>ActivationCache</code>,<code>torch.save/load</code> 持久化</td></tr>
|
||
<tr><td><code>hook_utils.py</code></td><td>20-100</td><td><strong>零融合钩子</strong>(ablate 位置/头)+ <strong>路径补丁钩子</strong>(clean → corrupted 替换做因果分析)</td></tr>
|
||
</table>
|
||
|
||
<h3>和训练什么关系?</h3>
|
||
<p><strong>不是直接训练反馈</strong>,而是 <strong>离线可解释性研究</strong>:</p>
|
||
<div class="ascii"><span class="hl-blue">Phase 1 · 采集</span>
|
||
在线 rollout → <span class="hl-green">HookedTransformer.run_with_cache()</span> → 抓残差流 / 注意力 / MLP 激活 → 存盘
|
||
|
||
<span class="hl-blue">Phase 2 · 离线分析</span>
|
||
训练线性探针 → 识别"无效问题"方向(residual stream 某一层)→ 验证探针可迁移
|
||
|
||
<span class="hl-blue">Phase 3 · 干预</span>
|
||
在线 rollout → <span class="hl-yellow">CAA(Contrastive Activation Addition)</span>在目标步骤 t* 加方向 →
|
||
测量问题有效性改善(因果验证)
|
||
|
||
<span class="hl-blue">Phase 4 · 回写</span>
|
||
把有用的方向 / 钩子作为新能力提供给 Agent</div>
|
||
</section>
|
||
|
||
<section id="patterns">
|
||
<h2><span class="sec-num">§16</span>关键设计模式</h2>
|
||
<p class="sec-lead">从 ARES 源码提炼的七个高杠杆率模式。</p>
|
||
|
||
<div class="card-grid">
|
||
<div class="card">
|
||
<h4>1. Queue-Mediated Communication</h4>
|
||
<p><span class="tag tag-green">⭐ 最重要</span></p>
|
||
<p><code>asyncio.Queue + Future</code> 让线性代码与外部控制器无感接合。50 行代码,但抽象力巨大。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>2. Protocol-Oriented Design</h4>
|
||
<p><span class="tag tag-blue">结构子类型</span></p>
|
||
<p>几乎所有核心类型都是 <code>typing.Protocol</code>。无继承树,duck typing + 类型检查两全。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>3. Factory Pattern</h4>
|
||
<p><span class="tag tag-violet">依赖注入</span></p>
|
||
<p>环境收"工厂"而非"实例"。<code>container_factory</code> / <code>code_agent_factory</code>。便于 A/B 切换。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>4. Async Context Manager</h4>
|
||
<p><span class="tag tag-blue">生命周期</span></p>
|
||
<p>所有资源都 <code>async with</code>。保证 <code>__aexit__</code> 清理。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>5. Frozen Dataclass</h4>
|
||
<p><span class="tag tag-blue">并发安全</span></p>
|
||
<p>大部分 dataclass <code>frozen=True</code>。async 并发下避免状态污染。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>6. Atexit Janitor</h4>
|
||
<p><span class="tag tag-yellow">兜底清理</span></p>
|
||
<p>异常退出时清理外部资源(容器、临时文件)。<code>atexit.register</code> + 同步版 <code>stop_and_remove</code>。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>7. YAGNI</h4>
|
||
<p><span class="tag tag-green">哲学</span></p>
|
||
<p>CLAUDE.md 明说:不做过度抽象。<code>CodeEnvironment</code> 直接实现 <code>Environment</code>,不搞继承塔。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="gold">
|
||
<h2><span class="sec-num">§17</span>亮点 · 坑点 · 可抄</h2>
|
||
<p class="sec-lead">最值得拿的三类清单。</p>
|
||
|
||
<h3>✨ 亮点 8 条</h3>
|
||
<div class="card-grid">
|
||
<div class="card">
|
||
<h4>Queue-Mediated 50 行</h4>
|
||
<p>让 Agent 线性代码无感被 RL 托管。全框架最关键杠杆点。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>Parser 三级降级</h4>
|
||
<p>JSON → auto-fix → regex。XML 还有 salvage_truncated_response。容错极强。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>增量输出追踪</h4>
|
||
<p>rfind 锚点定位新增内容。适配超长 tmux 会话不爆上下文。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>双保险概括</h4>
|
||
<p>主动 200k 阈值 + 被动 <code>context_length_exceeded</code> 捕获。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>双栈拦截</h4>
|
||
<p>in-process Python Queue + out-of-process Go HTTP。覆盖所有部署形态。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>成本内置</h4>
|
||
<p>每个 <code>LLMResponse</code> 带 <code>cost</code>。精细计费无痛。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>线程局部 httpx</h4>
|
||
<p>规避 async 事件循环跨线程死锁的最佳实践。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h4>Janitor atexit</h4>
|
||
<p>云资源兜底清理,防泄露。任何管外部资源的系统都该抄。</p>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>⚠️ 坑点 8 条</h3>
|
||
<ol>
|
||
<li><strong>Terminus2Agent tmux 初始化复杂</strong>(<span class="file">:196-319</span>):动态 apt-get 装 tmux,建议生产镜像预装</li>
|
||
<li><strong>200k token 阈值硬编码</strong>(<span class="file">:666</span>):<code>2 字符 = 1 token</code> 估算粗糙,对 Unicode 不准</li>
|
||
<li><strong>ares-proxy 响应通道大小 = 1</strong>(<span class="file">broker.go:41</span>):agent 不及时取会延迟后续处理</li>
|
||
<li><strong>Chat 与 Responses 转换器有重复</strong>(tool_choice 部分)</li>
|
||
<li><strong>增量输出定位失败兜底</strong>(<span class="file">:453-456</span>):rfind=-1 时输出整屏,可能重复</li>
|
||
<li><strong>Docker 不支持资源配置</strong>(CPU/Memory TODO 未完成)</li>
|
||
<li><strong>StatTracker 周期 60s 硬编码</strong>,无配置接口</li>
|
||
<li><strong>无 wandb / mlflow</strong>,仅 tensorboard</li>
|
||
</ol>
|
||
|
||
<h3>💎 可抄片段 6 条(直接能用)</h3>
|
||
<table>
|
||
<tr><th>片段</th><th>位置</th><th>适用场景</th></tr>
|
||
<tr><td>Queue-Mediated 50 行</td><td><span class="file">queue_mediated_client.py:47-50</span></td><td>任何"线性代码 + 外部控制"场景:模拟器、多租户推理、游戏 AI</td></tr>
|
||
<tr><td>三级降级 Parser</td><td><span class="file">json_parser.py:75-99</span></td><td>LLM 输出解析的最佳实践模板</td></tr>
|
||
<tr><td>Tenacity 重试装饰器</td><td><span class="file">chat_completions_compatible.py:44-53</span></td><td>异步 API 客户端通用重试</td></tr>
|
||
<tr><td>线程局部 httpx</td><td><span class="file">chat_completions_compatible.py:22-41</span></td><td>多线程 async 场景规避事件循环冲突</td></tr>
|
||
<tr><td>Janitor atexit</td><td><span class="file">code_env.py:348-389</span></td><td>任何管外部资源(容器、临时文件、远程 session)的系统</td></tr>
|
||
<tr><td>ValueAndFuture</td><td><span class="file">async_utils.py</span></td><td>8 行泛型 dataclass,"值 + 响应 future"原子单位</td></tr>
|
||
</table>
|
||
</section>
|
||
|
||
<section id="comparison">
|
||
<h2><span class="sec-num">§18</span>对标 / 启发</h2>
|
||
<p class="sec-lead">放到 Agent + RL 生态里看,ARES 占哪块地,能给其他项目什么。</p>
|
||
|
||
<h3>vs 其他 Agent RL 框架</h3>
|
||
<table>
|
||
<tr><th></th><th>ARES</th><th>Verl</th><th>OpenPipe Mini-ART</th><th>Gymnasium</th></tr>
|
||
<tr><td>定位</td><td>环境层</td><td>训练器 + 环境</td><td>Fine-tune + eval 一体</td><td>通用 RL 标准</td></tr>
|
||
<tr><td>Agent 支持</td><td>SWE + terminal</td><td>SWE</td><td>多场景</td><td>非 LLM</td></tr>
|
||
<tr><td>沙箱</td><td>Daytona + Docker</td><td>自研</td><td>自研</td><td>无</td></tr>
|
||
<tr><td>拦截机制</td><td>asyncio.Queue + Go proxy</td><td>RPC</td><td>直接调用</td><td>N/A</td></tr>
|
||
<tr><td>可解释性</td><td>mech_interp 附加</td><td>无</td><td>无</td><td>无</td></tr>
|
||
</table>
|
||
|
||
<h3>对标 Manus</h3>
|
||
<div class="highlight-box">
|
||
<h4>边界与重叠</h4>
|
||
<p><strong>Manus = 成品 Agent(应用层)</strong>;<strong>ARES = 训练/评估基础设施(基础设施层)</strong>。角色不同。</p>
|
||
<p style="margin-top:0.8rem;">
|
||
但 <strong>ARES 里的 Agent 运行内核</strong>(<code>terminus2_agent</code> + <code>ares-proxy</code> + Daytona 沙箱)
|
||
<strong>≈ 一个 Manus-like 的 Agent 运行器</strong>。
|
||
</p>
|
||
<p style="margin-bottom:0;">
|
||
把 ARES 的 RL 训练钩子(reward 读取、并发 rollout、gather 聚合)<strong>拆掉</strong>,剩下的部分可以当独立 Agent 运行时复用。
|
||
这是最值得拿的"后半"。
|
||
</p>
|
||
</div>
|
||
|
||
<h3>对个人项目的启发</h3>
|
||
<table>
|
||
<tr><th>项目</th><th>可借鉴</th></tr>
|
||
<tr><td><strong>HiClaw / OpenClaw 魔改</strong></td><td><code>QueueMediatedLLMClient</code> + <code>ares-proxy</code> 的双栈拦截可直接借鉴,给多 Agent 编排做统一观察接口</td></tr>
|
||
<tr><td><strong>手机 GUI Agent</strong></td><td><code>Terminus2Agent</code> 的 tmux 增量输出追踪 + 主动概括策略可迁移到 GUI 长轨迹</td></tr>
|
||
<tr><td><strong>Hermes Personal</strong></td><td><code>ChatCompletionCompatibleLLMClient</code> 的线程局部客户端 + tenacity 重试模板直接抄</td></tr>
|
||
<tr><td><strong>Manus 逆向</strong></td><td>参考 ARES 的 Agent runtime 设计,对比 Manus 公开行为里哪些已经实现、哪些还差</td></tr>
|
||
<tr><td><strong>通用</strong></td><td>Parser 三级降级 + Janitor atexit 属于"看过一次就该用"的基础模式</td></tr>
|
||
</table>
|
||
</section>
|
||
|
||
<section id="reading-path">
|
||
<h2><span class="sec-num">§19</span>阅读路径推荐</h2>
|
||
<p class="sec-lead">想理解 ARES,按以下顺序读最省脑。</p>
|
||
|
||
<ol style="font-size: 0.92rem;">
|
||
<li><code>CLAUDE.md</code> —— 13,139 字节,仓库自带,密度比 README 高 3 倍</li>
|
||
<li><code>src/ares/__init__.py</code> —— 公开 API 清单</li>
|
||
<li><code>src/ares/environments/base.py</code> —— Environment 协议 + TimeStep</li>
|
||
<li><code>src/ares/llms/queue_mediated_client.py</code> —— <strong>50 行核心,看一眼就懂</strong></li>
|
||
<li><code>src/ares/async_utils.py</code> —— ValueAndFuture 抽象</li>
|
||
<li><code>src/ares/environments/code_env.py</code> —— 250 行 RL 主循环</li>
|
||
<li><code>src/ares/code_agents/mini_swe_agent.py</code> —— 简单 Agent</li>
|
||
<li><code>src/ares/containers/docker.py</code> —— 熟悉容器抽象</li>
|
||
<li><code>examples/03_parallel_eval_with_api.py</code> —— 端到端用法</li>
|
||
<li><code>ares-proxy/*.go</code> —— 跨进程版队列中介</li>
|
||
<li><code>src/ares/code_agents/terminus2/terminus2_agent.py</code> —— 1,110 行生产级 Agent</li>
|
||
<li><code>src/ares/contrib/mech_interp/*</code> —— 可解释性加成</li>
|
||
</ol>
|
||
|
||
<div class="callout green">
|
||
<strong>捷径</strong>:如果只有 30 分钟时间,直接读 4 + 5 + 6 + 10(Queue-Mediated + ValueAndFuture + CodeEnvironment + ares-proxy)。这四个文件涵盖了 ARES 最精华的 80%。
|
||
</div>
|
||
</section>
|
||
|
||
<footer>
|
||
<p>
|
||
<strong>解析元信息</strong> · 基于 commit <code>c804aa2</code> · 生成日期 2026-04-24 ·
|
||
共解析 72 个 Python 文件 + 5 个 Go 文件,总 LOC 约 11,900。
|
||
</p>
|
||
<p style="margin-top:0.6rem;">
|
||
<a href="https://github.com/withmartian/ares" target="_blank">上游仓库 →</a>
|
||
·
|
||
本地源码:<code>source/</code>
|
||
·
|
||
主报告:<code>.memory/source-analysis.md</code>
|
||
</p>
|
||
</footer>
|
||
|
||
</main>
|
||
</div>
|
||
|
||
<script>
|
||
const links = document.querySelectorAll('nav a[href^="#"]');
|
||
const sections = Array.from(links).map(l => document.querySelector(l.getAttribute('href'))).filter(Boolean);
|
||
|
||
function onScroll() {
|
||
const scrollY = window.scrollY + 80;
|
||
let active = sections[0];
|
||
for (const sec of sections) {
|
||
if (sec && sec.offsetTop <= scrollY) active = sec;
|
||
}
|
||
links.forEach(l => {
|
||
l.classList.toggle('active', l.getAttribute('href') === '#' + (active && active.id));
|
||
});
|
||
}
|
||
window.addEventListener('scroll', onScroll, { passive: true });
|
||
onScroll();
|
||
</script>
|
||
|
||
</body>
|
||
</html>
|