Files
cvpr2026-3d-vision/index.html
2026-04-25 19:23:17 +08:00

1151 lines
34 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CVPR 2026 - 基础视觉大模型 x 三维视觉</title>
<style>
:root {
--bg: #08080f;
--bg2: #0e0e1a;
--bg3: #161628;
--accent: #6366f1;
--accent2: #06b6d4;
--accent3: #f43f5e;
--accent4: #10b981;
--accent5: #f59e0b;
--text: #e2e4f0;
--text2: #8890b0;
--border: #1e2040;
--glow: 0 0 30px rgba(99, 102, 241, 0.15);
--card: rgba(14, 14, 26, 0.8);
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
background: var(--bg);
color: var(--text);
line-height: 1.7;
overflow-x: hidden;
}
/* ===== Hero ===== */
.hero {
min-height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
text-align: center;
padding: 2rem;
position: relative;
overflow: hidden;
}
.hero::before {
content: '';
position: absolute;
inset: 0;
background:
radial-gradient(ellipse at 30% 40%, rgba(99, 102, 241, 0.12) 0%, transparent 55%),
radial-gradient(ellipse at 70% 60%, rgba(6, 182, 212, 0.1) 0%, transparent 55%),
radial-gradient(ellipse at 50% 90%, rgba(244, 63, 94, 0.06) 0%, transparent 40%);
pointer-events: none;
}
.hero-grid {
position: absolute;
inset: 0;
background-image:
linear-gradient(rgba(99, 102, 241, 0.04) 1px, transparent 1px),
linear-gradient(90deg, rgba(99, 102, 241, 0.04) 1px, transparent 1px);
background-size: 50px 50px;
pointer-events: none;
animation: gridPulse 8s ease-in-out infinite;
}
@keyframes gridPulse {
0%, 100% { opacity: 0.4; }
50% { opacity: 1; }
}
.hero-badge {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.4rem 1.2rem;
border: 1px solid var(--accent);
border-radius: 100px;
font-size: 0.75rem;
color: var(--accent);
letter-spacing: 0.12em;
text-transform: uppercase;
margin-bottom: 2rem;
position: relative;
z-index: 1;
background: rgba(99, 102, 241, 0.06);
}
.hero-badge .dot {
width: 6px; height: 6px;
border-radius: 50%;
background: var(--accent);
animation: blink 2s infinite;
}
@keyframes blink {
0%, 100% { opacity: 1; }
50% { opacity: 0.3; }
}
.hero h1 {
font-size: clamp(2rem, 5vw, 3.5rem);
font-weight: 800;
line-height: 1.2;
position: relative;
z-index: 1;
margin-bottom: 1rem;
}
.hero h1 .gradient {
background: linear-gradient(135deg, var(--accent), var(--accent2));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.hero-sub {
font-size: 1.1rem;
color: var(--text2);
max-width: 600px;
position: relative;
z-index: 1;
margin-bottom: 2.5rem;
}
.hero-stats {
display: flex;
gap: 2.5rem;
position: relative;
z-index: 1;
flex-wrap: wrap;
justify-content: center;
}
.hero-stats .stat {
text-align: center;
}
.hero-stats .stat-val {
font-size: 2rem;
font-weight: 800;
background: linear-gradient(135deg, var(--accent), var(--accent2));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.hero-stats .stat-label {
font-size: 0.75rem;
color: var(--text2);
letter-spacing: 0.08em;
text-transform: uppercase;
}
/* ===== Sections ===== */
.container { max-width: 1200px; margin: 0 auto; padding: 0 1.5rem; }
section { padding: 5rem 0; }
.section-label {
display: inline-flex;
align-items: center;
gap: 0.5rem;
font-size: 0.75rem;
color: var(--accent);
letter-spacing: 0.12em;
text-transform: uppercase;
margin-bottom: 1rem;
}
.section-label::before {
content: '';
width: 20px;
height: 1px;
background: var(--accent);
}
.section-title {
font-size: 2rem;
font-weight: 700;
margin-bottom: 0.5rem;
}
.section-desc {
color: var(--text2);
max-width: 600px;
margin-bottom: 3rem;
}
/* ===== Project Cards ===== */
.projects-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(340px, 1fr));
gap: 1.5rem;
}
.project-card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 16px;
padding: 2rem;
transition: all 0.3s ease;
position: relative;
overflow: hidden;
}
.project-card:hover {
border-color: var(--accent);
box-shadow: var(--glow);
transform: translateY(-4px);
}
.project-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 3px;
background: linear-gradient(90deg, var(--accent), var(--accent2));
opacity: 0;
transition: opacity 0.3s;
}
.project-card:hover::before { opacity: 1; }
.card-header {
display: flex;
align-items: flex-start;
justify-content: space-between;
margin-bottom: 1rem;
}
.card-icon {
width: 48px; height: 48px;
border-radius: 12px;
display: flex;
align-items: center;
justify-content: center;
font-size: 1.5rem;
flex-shrink: 0;
}
.card-stars {
display: flex;
align-items: center;
gap: 0.3rem;
font-size: 0.8rem;
color: var(--accent5);
background: rgba(245, 158, 11, 0.1);
padding: 0.25rem 0.6rem;
border-radius: 100px;
}
.card-name {
font-size: 1.3rem;
font-weight: 700;
margin-bottom: 0.3rem;
}
.card-full-name {
font-size: 0.78rem;
color: var(--text2);
margin-bottom: 0.8rem;
line-height: 1.5;
}
.card-desc {
font-size: 0.9rem;
color: var(--text2);
margin-bottom: 1.2rem;
line-height: 1.7;
}
.card-tags {
display: flex;
flex-wrap: wrap;
gap: 0.4rem;
margin-bottom: 1.2rem;
}
.tag {
font-size: 0.7rem;
padding: 0.2rem 0.6rem;
border-radius: 100px;
border: 1px solid var(--border);
color: var(--text2);
background: rgba(255, 255, 255, 0.02);
}
.tag.highlight {
border-color: var(--accent);
color: var(--accent);
background: rgba(99, 102, 241, 0.08);
}
.card-specs {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.6rem;
padding-top: 1rem;
border-top: 1px solid var(--border);
}
.spec {
display: flex;
flex-direction: column;
}
.spec-label {
font-size: 0.65rem;
color: var(--text2);
text-transform: uppercase;
letter-spacing: 0.08em;
}
.spec-val {
font-size: 0.85rem;
font-weight: 600;
}
.spec-val.green { color: var(--accent4); }
.spec-val.yellow { color: var(--accent5); }
.spec-val.red { color: var(--accent3); }
.spec-val.blue { color: var(--accent2); }
.card-link {
display: inline-flex;
align-items: center;
gap: 0.4rem;
margin-top: 1.2rem;
padding: 0.5rem 1.2rem;
border: 1px solid var(--accent);
border-radius: 8px;
color: var(--accent);
text-decoration: none;
font-size: 0.85rem;
font-weight: 600;
transition: all 0.2s;
}
.card-link:hover {
background: var(--accent);
color: #fff;
}
/* ===== Architecture Diagram ===== */
.arch-section { background: var(--bg2); border-radius: 20px; padding: 3rem; margin-bottom: 3rem; }
.arch-flow {
display: flex;
align-items: center;
justify-content: center;
gap: 0.8rem;
flex-wrap: wrap;
padding: 2rem 0;
}
.arch-node {
padding: 0.8rem 1.5rem;
border-radius: 12px;
font-size: 0.85rem;
font-weight: 600;
text-align: center;
min-width: 120px;
}
.arch-node.input { background: rgba(6, 182, 212, 0.15); border: 1px solid rgba(6, 182, 212, 0.3); color: var(--accent2); }
.arch-node.process { background: rgba(99, 102, 241, 0.15); border: 1px solid rgba(99, 102, 241, 0.3); color: var(--accent); }
.arch-node.output { background: rgba(16, 185, 129, 0.15); border: 1px solid rgba(16, 185, 129, 0.3); color: var(--accent4); }
.arch-arrow { color: var(--text2); font-size: 1.2rem; }
/* ===== Hardware Section ===== */
.hw-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 1.5rem;
}
.hw-card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 14px;
padding: 1.8rem;
transition: all 0.3s;
}
.hw-card:hover {
border-color: var(--accent2);
box-shadow: 0 0 25px rgba(6, 182, 212, 0.1);
}
.hw-card .hw-icon {
font-size: 2rem;
margin-bottom: 1rem;
}
.hw-card h3 {
font-size: 1.1rem;
margin-bottom: 0.5rem;
}
.hw-card p {
font-size: 0.85rem;
color: var(--text2);
line-height: 1.7;
}
.hw-card .hw-price {
margin-top: 1rem;
padding-top: 0.8rem;
border-top: 1px solid var(--border);
font-size: 1.2rem;
font-weight: 700;
}
.hw-card .hw-price span {
font-size: 0.75rem;
color: var(--text2);
font-weight: 400;
}
.recommend {
border-color: var(--accent4) !important;
position: relative;
}
.recommend::after {
content: 'RECOMMENDED';
position: absolute;
top: -1px;
right: 20px;
background: var(--accent4);
color: #000;
font-size: 0.6rem;
font-weight: 700;
letter-spacing: 0.1em;
padding: 0.2rem 0.6rem;
border-radius: 0 0 6px 6px;
}
/* ===== VRAM Table ===== */
.vram-table {
width: 100%;
border-collapse: collapse;
margin-top: 2rem;
}
.vram-table th, .vram-table td {
padding: 1rem 1.2rem;
text-align: left;
border-bottom: 1px solid var(--border);
font-size: 0.9rem;
}
.vram-table th {
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.1em;
color: var(--text2);
font-weight: 600;
}
.vram-table tr:hover td {
background: rgba(99, 102, 241, 0.04);
}
.vram-bar {
height: 6px;
border-radius: 3px;
background: var(--border);
position: relative;
overflow: hidden;
min-width: 120px;
}
.vram-bar .fill {
height: 100%;
border-radius: 3px;
transition: width 1s ease;
}
.vram-bar .fill.low { background: var(--accent4); }
.vram-bar .fill.mid { background: var(--accent5); }
.vram-bar .fill.high { background: var(--accent3); }
/* ===== Setup Guide ===== */
.setup-steps {
display: flex;
flex-direction: column;
gap: 1.5rem;
}
.step {
display: flex;
gap: 1.5rem;
align-items: flex-start;
}
.step-num {
width: 40px;
height: 40px;
border-radius: 10px;
background: rgba(99, 102, 241, 0.12);
border: 1px solid rgba(99, 102, 241, 0.25);
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
color: var(--accent);
flex-shrink: 0;
}
.step-content h3 {
font-size: 1rem;
margin-bottom: 0.3rem;
}
.step-content p {
font-size: 0.85rem;
color: var(--text2);
line-height: 1.7;
}
.code-block {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 10px;
padding: 1.2rem 1.5rem;
margin-top: 0.8rem;
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 0.8rem;
color: var(--accent2);
overflow-x: auto;
line-height: 1.8;
}
.code-block .comment { color: var(--text2); }
.code-block .cmd { color: var(--accent4); }
/* ===== Timeline ===== */
.timeline {
position: relative;
padding-left: 2rem;
}
.timeline::before {
content: '';
position: absolute;
left: 7px;
top: 0;
bottom: 0;
width: 2px;
background: var(--border);
}
.timeline-item {
position: relative;
padding-bottom: 2rem;
}
.timeline-item::before {
content: '';
position: absolute;
left: -2rem;
top: 6px;
width: 12px;
height: 12px;
border-radius: 50%;
border: 2px solid var(--accent);
background: var(--bg);
}
.timeline-item.done::before {
background: var(--accent4);
border-color: var(--accent4);
}
.timeline-item.current::before {
background: var(--accent5);
border-color: var(--accent5);
box-shadow: 0 0 10px rgba(245, 158, 11, 0.4);
}
.timeline-item h4 {
font-size: 0.95rem;
margin-bottom: 0.2rem;
}
.timeline-item p {
font-size: 0.82rem;
color: var(--text2);
}
.timeline-item .date {
font-size: 0.7rem;
color: var(--accent);
margin-bottom: 0.3rem;
}
/* ===== Footer ===== */
footer {
text-align: center;
padding: 3rem;
color: var(--text2);
font-size: 0.8rem;
border-top: 1px solid var(--border);
}
footer a {
color: var(--accent);
text-decoration: none;
}
/* ===== Responsive ===== */
@media (max-width: 768px) {
.projects-grid { grid-template-columns: 1fr; }
.hw-grid { grid-template-columns: 1fr; }
.hero-stats { gap: 1.5rem; }
.arch-flow { flex-direction: column; }
.arch-arrow { transform: rotate(90deg); }
.vram-table { font-size: 0.8rem; }
.vram-table th, .vram-table td { padding: 0.7rem; }
}
/* ===== Floating 3D Particles ===== */
.particles {
position: fixed;
inset: 0;
pointer-events: none;
z-index: 0;
overflow: hidden;
}
.particle {
position: absolute;
width: 3px;
height: 3px;
border-radius: 50%;
background: var(--accent);
opacity: 0.15;
animation: float linear infinite;
}
@keyframes float {
0% { transform: translateY(100vh) translateX(0); opacity: 0; }
10% { opacity: 0.15; }
90% { opacity: 0.15; }
100% { transform: translateY(-10vh) translateX(30px); opacity: 0; }
}
/* ===== Scroll Animations ===== */
.fade-in {
opacity: 0;
transform: translateY(30px);
transition: all 0.6s ease;
}
.fade-in.visible {
opacity: 1;
transform: translateY(0);
}
</style>
</head>
<body>
<!-- Particles -->
<div class="particles" id="particles"></div>
<!-- Hero -->
<section class="hero">
<div class="hero-grid"></div>
<div class="hero-badge"><span class="dot"></span> CVPR 2026 - Denver, CO - June 3-7</div>
<h1><span class="gradient">Foundation Vision Models</span><br>x 3D Vision</h1>
<p class="hero-sub">基础视觉大模型在三维视觉领域展现出惊人潜力。精选 CVPR 2026 热门开源项目,从统一 3D 重建到空间推理,涵盖前沿方向。</p>
<div class="hero-stats">
<div class="stat">
<div class="stat-val">16,092</div>
<div class="stat-label">Total Submissions</div>
</div>
<div class="stat">
<div class="stat-val">4,090</div>
<div class="stat-label">Accepted Papers</div>
</div>
<div class="stat">
<div class="stat-val">25.4%</div>
<div class="stat-label">Acceptance Rate</div>
</div>
<div class="stat">
<div class="stat-val">4</div>
<div class="stat-label">Projects Tracked</div>
</div>
</div>
</section>
<!-- Projects -->
<section>
<div class="container">
<div class="section-label">Core Projects</div>
<h2 class="section-title">3D Vision Foundation Models</h2>
<p class="section-desc">精选 4 个 CVPR 2026 开源项目,聚焦基础视觉大模型在三维重建、空间推理和几何学习上的突破。</p>
<div class="projects-grid">
<!-- G2VLM -->
<div class="project-card fade-in">
<div class="card-header">
<div class="card-icon" style="background: rgba(99, 102, 241, 0.12); color: var(--accent);">3D</div>
<div class="card-stars">&#9733; 291</div>
</div>
<div class="card-name">G2VLM</div>
<div class="card-full-name">Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning</div>
<div class="card-desc">统一几何感知专家3D 重建)和语义感知专家(多模态理解)。输入图片即可生成 3D 点云并支持交错推理的空间问答。2B 参数 MoT 架构。</div>
<div class="card-tags">
<span class="tag highlight">3D Reconstruction</span>
<span class="tag highlight">Spatial Reasoning</span>
<span class="tag">VLM</span>
<span class="tag">Point Cloud</span>
<span class="tag">MoT</span>
</div>
<div class="card-specs">
<div class="spec">
<div class="spec-label">Parameters</div>
<div class="spec-val blue">2B</div>
</div>
<div class="spec">
<div class="spec-label">Min VRAM</div>
<div class="spec-val green">~8 GB</div>
</div>
<div class="spec">
<div class="spec-label">Institution</div>
<div class="spec-val">Shanghai AI Lab / UCLA / ZJU</div>
</div>
<div class="spec">
<div class="spec-label">License</div>
<div class="spec-val green">Apache 2.0</div>
</div>
</div>
<a class="card-link" href="https://github.com/InternRobotics/G2VLM" target="_blank">GitHub &#8599;</a>
</div>
<!-- VLM-3R -->
<div class="project-card fade-in">
<div class="card-header">
<div class="card-icon" style="background: rgba(6, 182, 212, 0.12); color: var(--accent2);">VR</div>
<div class="card-stars">&#9733; 363</div>
</div>
<div class="card-name">VLM-3R</div>
<div class="card-full-name">Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction</div>
<div class="card-desc">统一 VLM 框架,整合 3D 重建指令微调,实现单目视频的深度空间理解。无需外部深度传感器或预构建 3D 地图,基于 LLaVA-NeXT-Video-7B。</div>
<div class="card-tags">
<span class="tag highlight">Monocular Video</span>
<span class="tag highlight">Instruction Tuning</span>
<span class="tag">VLM</span>
<span class="tag">Depth-Free</span>
<span class="tag">LLaVA</span>
</div>
<div class="card-specs">
<div class="spec">
<div class="spec-label">Parameters</div>
<div class="spec-val blue">7B</div>
</div>
<div class="spec">
<div class="spec-label">Min VRAM</div>
<div class="spec-val yellow">~16 GB</div>
</div>
<div class="spec">
<div class="spec-label">Institution</div>
<div class="spec-val">UT Austin / Meta / UCLA</div>
</div>
<div class="spec">
<div class="spec-label">Backbone</div>
<div class="spec-val">LLaVA-NeXT + CUT3R</div>
</div>
</div>
<a class="card-link" href="https://github.com/VITA-Group/VLM-3R" target="_blank">GitHub &#8599;</a>
</div>
<!-- TALO -->
<div class="project-card fade-in">
<div class="card-header">
<div class="card-icon" style="background: rgba(16, 185, 129, 0.12); color: var(--accent4);">TA</div>
<div class="card-stars">&#9733; 56</div>
</div>
<div class="card-name">TALO</div>
<div class="card-full-name">Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction</div>
<div class="card-desc">即插即用框架,修正 3D 视觉基础模型中的空间几何不一致性,实现全局一致的在线 3D 重建。兼容 VGGT、Pi3、MapAnything 等主流骨干网络。</div>
<div class="card-tags">
<span class="tag highlight">Plug-and-Play</span>
<span class="tag highlight">Global Consistency</span>
<span class="tag">Online Recon</span>
<span class="tag">VGGT</span>
<span class="tag">SLAM</span>
</div>
<div class="card-specs">
<div class="spec">
<div class="spec-label">Type</div>
<div class="spec-val blue">Framework</div>
</div>
<div class="spec">
<div class="spec-label">Min VRAM</div>
<div class="spec-val yellow">~12 GB</div>
</div>
<div class="spec">
<div class="spec-label">Compatible</div>
<div class="spec-val">VGGT / Pi3 / MapAnything</div>
</div>
<div class="spec">
<div class="spec-label">Platform</div>
<div class="spec-val red">Linux + CUDA</div>
</div>
</div>
<a class="card-link" href="https://github.com/Xian-Bei/TALO" target="_blank">GitHub &#8599;</a>
</div>
<!-- Flow3r -->
<div class="project-card fade-in">
<div class="card-header">
<div class="card-icon" style="background: rgba(244, 63, 94, 0.12); color: var(--accent3);">F3</div>
<div class="card-stars">&#9733; 60</div>
</div>
<div class="card-name">Flow3r</div>
<div class="card-full-name">Factored Flow Prediction for Scalable Visual Geometry Learning</div>
<div class="card-desc">通过分解化光流预测增强视觉几何学习,利用无标注单目视频的密集 2D 对应关系作为监督信号。在 8 个 benchmark 上达到 SOTA。</div>
<div class="card-tags">
<span class="tag highlight">Optical Flow</span>
<span class="tag highlight">8x SOTA</span>
<span class="tag">Geometry Learning</span>
<span class="tag">Self-Supervised</span>
<span class="tag">Scalable</span>
</div>
<div class="card-specs">
<div class="spec">
<div class="spec-label">Type</div>
<div class="spec-val blue">Model</div>
</div>
<div class="spec">
<div class="spec-label">Min VRAM</div>
<div class="spec-val green">~8 GB</div>
</div>
<div class="spec">
<div class="spec-label">Supervision</div>
<div class="spec-val">Unlabeled Video</div>
</div>
<div class="spec">
<div class="spec-label">Benchmarks</div>
<div class="spec-val green">8 SOTA</div>
</div>
</div>
<a class="card-link" href="https://github.com/Kidrauh/flow3r" target="_blank">GitHub &#8599;</a>
</div>
</div>
</div>
</section>
<!-- Architecture -->
<section>
<div class="container">
<div class="section-label">Typical Pipeline</div>
<h2 class="section-title">3D Vision Foundation Model Pipeline</h2>
<p class="section-desc">这些项目的共同架构模式:视觉编码 + 几何感知 + 语义理解。</p>
<div class="arch-section fade-in">
<div class="arch-flow">
<div class="arch-node input">RGB Image<br><small>/ Video</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node process">Visual Encoder<br><small>ViT / DINOv2</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node process">Geometry Expert<br><small>3D Reconstruction</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node output">3D Output<br><small>Point Cloud / GS</small></div>
</div>
<div class="arch-flow" style="margin-top: 1rem;">
<div class="arch-node input">Text Query<br><small>Spatial Q&A</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node process">LLM Backbone<br><small>Qwen2 / LLaMA</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node process">Semantic Expert<br><small>Spatial Reasoning</small></div>
<div class="arch-arrow">&#10132;</div>
<div class="arch-node output">Answer<br><small>+ 3D Grounding</small></div>
</div>
</div>
</div>
</section>
<!-- VRAM Requirements -->
<section>
<div class="container">
<div class="section-label">Hardware</div>
<h2 class="section-title">VRAM Requirements Comparison</h2>
<p class="section-desc">所有项目均需 NVIDIA CUDA GPU。RTX 4090 (24GB) 可覆盖全部。</p>
<div class="arch-section fade-in">
<table class="vram-table">
<thead>
<tr>
<th>Project</th>
<th>Params</th>
<th>Min VRAM</th>
<th>Recommended</th>
<th>Usage</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>G2VLM</strong></td>
<td>2B</td>
<td>
~8 GB
<div class="vram-bar"><div class="fill low" style="width: 33%;"></div></div>
</td>
<td>16 GB+</td>
<td>RTX 3090 / 4070Ti</td>
</tr>
<tr>
<td><strong>VLM-3R</strong></td>
<td>7B</td>
<td>
~16 GB
<div class="vram-bar"><div class="fill mid" style="width: 67%;"></div></div>
</td>
<td>24 GB</td>
<td>RTX 4090 / A100</td>
</tr>
<tr>
<td><strong>TALO</strong></td>
<td>-</td>
<td>
~12 GB
<div class="vram-bar"><div class="fill mid" style="width: 50%;"></div></div>
</td>
<td>24 GB</td>
<td>RTX 4090 / A100</td>
</tr>
<tr>
<td><strong>Flow3r</strong></td>
<td>-</td>
<td>
~8 GB
<div class="vram-bar"><div class="fill low" style="width: 33%;"></div></div>
</td>
<td>16 GB+</td>
<td>RTX 3090 / 4070Ti</td>
</tr>
</tbody>
</table>
</div>
</div>
</section>
<!-- Hardware Solutions -->
<section>
<div class="container">
<div class="section-label">Solutions</div>
<h2 class="section-title">Local Hardware Options</h2>
<p class="section-desc">macOS 不支持 NVIDIA eGPU 驱动。以下是可行的本地运行方案。</p>
<div class="hw-grid">
<div class="hw-card fade-in">
<div class="hw-icon">&#9729;</div>
<h3>Cloud GPU (Zero Hardware)</h3>
<p>Google Colab Pro+ / RunPod / Vast.ai<br>A100 40GB 或 4090 24GB 按需租用<br>适合偶尔跑实验,无前期投入</p>
<div class="hw-price">~&#165;3<span>/hour (4090) | ~&#165;150/month (Colab Pro+)</span></div>
</div>
<div class="hw-card recommend fade-in">
<div class="hw-icon">&#128421;</div>
<h3>Linux Mini-ITX + RTX 4090</h3>
<p>独立 Linux 小主机方案<br>mini-ITX 机箱 + RTX 4090 24GB + Ubuntu<br>从 Mac SSH 远程操作,兼容性最佳</p>
<div class="hw-price">~&#165;15,000<span> (主机 3K + 4090 12K)</span></div>
</div>
<div class="hw-card fade-in">
<div class="hw-icon">&#128268;</div>
<h3>eGPU + Linux (Intel Mac Only)</h3>
<p>雷电 eGPU 盒子 + RTX 4090<br>外置 SSD 装 Ubuntu开机选系统启动<br>仅限 Intel MacApple Silicon 不支持</p>
<div class="hw-price">~&#165;13,500<span> (盒子 1.5K + 4090 12K)</span></div>
</div>
</div>
</div>
</section>
<!-- Setup Guide -->
<section>
<div class="container">
<div class="section-label">Quick Start</div>
<h2 class="section-title">Environment Setup Guide</h2>
<p class="section-desc">显卡到位后的通用环境搭建步骤(以 Ubuntu + RTX 4090 为例)。</p>
<div class="setup-steps fade-in">
<div class="step">
<div class="step-num">1</div>
<div class="step-content">
<h3>Install NVIDIA Driver + CUDA 12.1</h3>
<p>安装 NVIDIA 驱动和 CUDA Toolkit。</p>
<div class="code-block">
<span class="comment"># Install NVIDIA driver</span>
<span class="cmd">sudo apt update && sudo apt install -y nvidia-driver-550</span>
<span class="cmd">sudo reboot</span>
<span class="comment"># Verify GPU</span>
<span class="cmd">nvidia-smi</span>
<span class="comment"># Install CUDA 12.1</span>
<span class="cmd">wget https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run</span>
<span class="cmd">sudo sh cuda_12.1.0_530.30.02_linux.run</span>
</div>
</div>
</div>
<div class="step">
<div class="step-num">2</div>
<div class="step-content">
<h3>Setup Conda + PyTorch</h3>
<p>创建 Python 环境并安装 CUDA 版 PyTorch。</p>
<div class="code-block">
<span class="comment"># Install Miniconda</span>
<span class="cmd">wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh</span>
<span class="cmd">bash Miniconda3-latest-Linux-x86_64.sh</span>
<span class="comment"># Create env</span>
<span class="cmd">conda create -n cvpr3d python=3.10 -y</span>
<span class="cmd">conda activate cvpr3d</span>
<span class="comment"># PyTorch + CUDA 12.1</span>
<span class="cmd">pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121</span>
</div>
</div>
</div>
<div class="step">
<div class="step-num">3</div>
<div class="step-content">
<h3>Clone & Run Projects</h3>
<p>克隆项目并按照各自 README 安装依赖。</p>
<div class="code-block">
<span class="comment"># G2VLM (2B, ~8GB VRAM)</span>
<span class="cmd">git clone https://github.com/InternRobotics/G2VLM && cd G2VLM</span>
<span class="cmd">pip install -r requirements.txt</span>
<span class="cmd">python inference_recon.py</span> <span class="comment"># 3D reconstruction</span>
<span class="cmd">python inference_chat.py</span> <span class="comment"># spatial reasoning</span>
<span class="comment"># VLM-3R (7B, ~16GB VRAM)</span>
<span class="cmd">git clone https://github.com/VITA-Group/VLM-3R && cd VLM-3R</span>
<span class="cmd">pip install -r requirements.txt</span>
<span class="comment"># TALO (plug-and-play, ~12GB VRAM)</span>
<span class="cmd">git clone https://github.com/Xian-Bei/TALO && cd TALO</span>
<span class="comment"># Flow3r (~8GB VRAM, 8x SOTA)</span>
<span class="cmd">git clone https://github.com/Kidrauh/flow3r && cd flow3r</span>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- Timeline -->
<section>
<div class="container">
<div class="section-label">Roadmap</div>
<h2 class="section-title">Personal Progress</h2>
<p class="section-desc">从调研到本地运行的计划时间线。</p>
<div class="timeline fade-in">
<div class="timeline-item done">
<div class="date">2026-03-19</div>
<h4>Project Survey Complete</h4>
<p>完成 CVPR 2026 3D Vision Foundation Model 调研,筛选出 G2VLM / VLM-3R / TALO / Flow3r 四个项目。</p>
</div>
<div class="timeline-item done">
<div class="date">2026-03-19</div>
<h4>Research Page Built</h4>
<p>研究展示页面上线,记录项目详情、硬件需求、环境搭建指南。</p>
</div>
<div class="timeline-item current">
<div class="date">Pending</div>
<h4>GPU Hardware Arrives</h4>
<p>等待 NVIDIA 显卡到位。推荐 RTX 4090 (24GB),可覆盖全部 4 个项目。</p>
</div>
<div class="timeline-item">
<div class="date">TBD</div>
<h4>Environment Setup</h4>
<p>Ubuntu + CUDA 12.1 + PyTorch 2.5.1 环境搭建。</p>
</div>
<div class="timeline-item">
<div class="date">TBD</div>
<h4>Run Experiments</h4>
<p>逐个运行 G2VLM → Flow3r → TALO → VLM-3R从低显存到高显存依次验证。</p>
</div>
</div>
</div>
</section>
<!-- Resources -->
<section>
<div class="container">
<div class="section-label">Resources</div>
<h2 class="section-title">Reference Links</h2>
<div class="arch-section fade-in" style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem;">
<a href="https://github.com/amusi/CVPR2026-Papers-with-Code" target="_blank" style="color: var(--accent); text-decoration: none; padding: 1rem; border: 1px solid var(--border); border-radius: 10px; transition: all 0.2s;">
<strong style="display: block; margin-bottom: 0.3rem;">CVPR2026-Papers-with-Code</strong>
<span style="font-size: 0.8rem; color: var(--text2);">Master list of all accepted papers with code</span>
</a>
<a href="https://github.com/Paper2Chinese/CVPR-2026-reading-papers-with-code" target="_blank" style="color: var(--accent2); text-decoration: none; padding: 1rem; border: 1px solid var(--border); border-radius: 10px; transition: all 0.2s;">
<strong style="display: block; margin-bottom: 0.3rem;">Paper2Chinese</strong>
<span style="font-size: 0.8rem; color: var(--text2);">Chinese reading group with code links</span>
</a>
<a href="https://openreview.net/group?id=thecvf.com%2FCVPR%2F2026%2FConference" target="_blank" style="color: var(--accent4); text-decoration: none; padding: 1rem; border: 1px solid var(--border); border-radius: 10px; transition: all 0.2s;">
<strong style="display: block; margin-bottom: 0.3rem;">CVPR 2026 OpenReview</strong>
<span style="font-size: 0.8rem; color: var(--text2);">Official accepted paper list & reviews</span>
</a>
<a href="https://encord.com/cvpr-2026/" target="_blank" style="color: var(--accent5); text-decoration: none; padding: 1rem; border: 1px solid var(--border); border-radius: 10px; transition: all 0.2s;">
<strong style="display: block; margin-bottom: 0.3rem;">CVPR 2026 Trends (Encord)</strong>
<span style="font-size: 0.8rem; color: var(--text2);">Trend analysis: 3D grounding, diffusion, world models</span>
</a>
</div>
</div>
</section>
<footer>
CVPR 2026 3D Vision Foundation Models Research &mdash; Built 2026-03-19<br>
<a href="https://cvpr.thecvf.com/Conferences/2026">cvpr.thecvf.com</a> &middot; Port 4070
</footer>
<script>
// Particles
const pc = document.getElementById('particles');
for (let i = 0; i < 30; i++) {
const p = document.createElement('div');
p.className = 'particle';
p.style.left = Math.random() * 100 + '%';
p.style.animationDuration = (8 + Math.random() * 12) + 's';
p.style.animationDelay = Math.random() * 10 + 's';
p.style.background = ['#6366f1', '#06b6d4', '#f43f5e', '#10b981'][Math.floor(Math.random() * 4)];
pc.appendChild(p);
}
// Scroll fade-in
const obs = new IntersectionObserver((entries) => {
entries.forEach(e => { if (e.isIntersecting) e.target.classList.add('visible'); });
}, { threshold: 0.1 });
document.querySelectorAll('.fade-in').forEach(el => obs.observe(el));
</script>
</body>
</html>