Compare commits

...

2 Commits

Author SHA1 Message Date
3111c854c5 feat(api): A3 backend skeleton with FastAPI + SQLModel
- 3 tables: Meeting / TranscriptSegment / Summary (with state machine)
- Routes: /api/upload-url + /api/upload-complete + meetings CRUD
- MinIO presigned PUT for direct browser upload
- BackgroundTasks state-machine stub for A5 to flesh out
- SQLite for local dev, PostgreSQL+asyncpg for prod
- CORS configured for frontend on 4490

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 19:14:16 +08:00
4f064bb470 auto-save 2026-04-13 19:12 (+1) 2026-04-13 19:12:14 +08:00
15 changed files with 469 additions and 0 deletions

25
api/.env.example Normal file
View File

@@ -0,0 +1,25 @@
# MeetNote API config
# 数据库 - 本地 SQLite生产 PostgreSQL
DATABASE_URL=sqlite+aiosqlite:///./meetnote.db
# DATABASE_URL=postgresql+asyncpg://user:pass@127.0.0.1:5432/meetnote
# 对象存储 (MinIO)
MINIO_ENDPOINT=127.0.0.1:9000
MINIO_REGION=us-east-1
MINIO_BUCKET=meetnote
MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_KEY=minioadmin
MINIO_SECURE=false
MINIO_PUBLIC_ENDPOINT=http://127.0.0.1:9000
# Groq Whisper
GROQ_API_KEY=
GROQ_MODEL=whisper-large-v3
# Poe Claude (复用全局)
POE_API_KEY=
POE_MODEL=Claude-Sonnet-4.6
# CORS
CORS_ORIGINS=http://localhost:4490,http://192.168.2.69:4490

6
api/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
.venv/
__pycache__/
*.pyc
.env
*.db
*.db-journal

37
api/README.md Normal file
View File

@@ -0,0 +1,37 @@
# MeetNote API
FastAPI 后端,端口 4491。
## 启动
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
cp .env.example .env # 填 GROQ_API_KEY / POE_API_KEY
uvicorn app.main:app --reload --port 4491
```
## 路由
- `GET /health`
- `POST /api/upload-url` — 申请 presigned URL 创建会议
- `POST /api/upload-complete` — 通知后端上传完成,触发后台处理
- `GET /api/meetings` — 会议列表
- `GET /api/meetings/{id}` — 单个会议
- `GET /api/meetings/{id}/transcript`
- `GET /api/meetings/{id}/summary`
- `DELETE /api/meetings/{id}`
## 数据库
- 默认 `sqlite+aiosqlite:///./meetnote.db`(本地开发)
- 生产改 `DATABASE_URL=postgresql+asyncpg://...`
## 状态机
`pending → uploading → uploaded → splitting → transcribing → summarizing → done | failed`
## A3 vs A5
A3本提交只搭好骨架 + state-machine stub。A5 会接上:
- ffmpeg silencedetect 切片
- Groq Whisper 真实转写
- Poe Claude 真实总结

0
api/app/__init__.py Normal file
View File

30
api/app/config.py Normal file
View File

@@ -0,0 +1,30 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
database_url: str = "sqlite+aiosqlite:///./meetnote.db"
minio_endpoint: str = "127.0.0.1:9000"
minio_region: str = "us-east-1"
minio_bucket: str = "meetnote"
minio_access_key: str = "minioadmin"
minio_secret_key: str = "minioadmin"
minio_secure: bool = False
minio_public_endpoint: str = "http://127.0.0.1:9000"
groq_api_key: str = ""
groq_model: str = "whisper-large-v3"
poe_api_key: str = ""
poe_model: str = "Claude-Sonnet-4.6"
cors_origins: str = "http://localhost:4490"
@property
def cors_origins_list(self) -> list[str]:
return [o.strip() for o in self.cors_origins.split(",") if o.strip()]
settings = Settings()

19
api/app/db.py Normal file
View File

@@ -0,0 +1,19 @@
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from sqlmodel import SQLModel
from .config import settings
engine = create_async_engine(settings.database_url, echo=False, future=True)
AsyncSessionLocal = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async def init_db() -> None:
# Import models so SQLModel.metadata sees them
from . import models # noqa: F401
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async def get_session() -> AsyncSession:
async with AsyncSessionLocal() as session:
yield session

31
api/app/main.py Normal file
View File

@@ -0,0 +1,31 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .config import settings
from .db import init_db
from .routers import meetings as meetings_router
@asynccontextmanager
async def lifespan(app: FastAPI):
await init_db()
yield
app = FastAPI(title="MeetNote API", version="0.1.0", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(meetings_router.router)
@app.get("/health")
async def health():
return {"ok": True, "service": "meetnote-api", "version": "0.1.0"}

51
api/app/models.py Normal file
View File

@@ -0,0 +1,51 @@
from datetime import datetime
from enum import Enum
from typing import Optional
from sqlmodel import SQLModel, Field, Column
from sqlalchemy import JSON
class MeetingStatus(str, Enum):
pending = "pending"
uploading = "uploading"
uploaded = "uploaded"
splitting = "splitting"
transcribing = "transcribing"
summarizing = "summarizing"
done = "done"
failed = "failed"
class Meeting(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
title: str
participants: Optional[str] = None # comma-separated
object_key: Optional[str] = None # MinIO key
file_size: Optional[int] = None
duration: Optional[int] = None # seconds
status: MeetingStatus = Field(default=MeetingStatus.pending)
chunks_done: int = 0
chunks_total: int = 0
error: Optional[str] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class TranscriptSegment(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
meeting_id: int = Field(foreign_key="meeting.id", index=True)
start: float # seconds
end: float
speaker: Optional[str] = None
text: str
class Summary(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
meeting_id: int = Field(foreign_key="meeting.id", unique=True, index=True)
key_points: list = Field(default_factory=list, sa_column=Column(JSON))
todos: list = Field(default_factory=list, sa_column=Column(JSON))
decisions: list = Field(default_factory=list, sa_column=Column(JSON))
keywords: list = Field(default_factory=list, sa_column=Column(JSON))
preview: str = ""
created_at: datetime = Field(default_factory=datetime.utcnow)

View File

114
api/app/routers/meetings.py Normal file
View File

@@ -0,0 +1,114 @@
import uuid
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from sqlmodel import select
from ..db import get_session
from ..models import Meeting, MeetingStatus, TranscriptSegment, Summary
from ..schemas import (
MeetingRead,
UploadCompleteRequest,
UploadUrlRequest,
UploadUrlResponse,
)
from ..services.storage import presign_put
from ..services.transcribe import process_meeting
router = APIRouter(prefix="/api", tags=["meetings"])
@router.post("/upload-url", response_model=UploadUrlResponse)
async def request_upload_url(
body: UploadUrlRequest, session: AsyncSession = Depends(get_session)
):
object_key = f"audio/{uuid.uuid4().hex}-{body.filename}"
meeting = Meeting(
title=body.title or body.filename,
participants=body.participants,
object_key=object_key,
status=MeetingStatus.uploading,
)
session.add(meeting)
await session.commit()
await session.refresh(meeting)
upload_url = presign_put(object_key, body.content_type)
return UploadUrlResponse(
meeting_id=meeting.id,
upload_url=upload_url,
object_key=object_key,
)
@router.post("/upload-complete")
async def upload_complete(
body: UploadCompleteRequest,
background: BackgroundTasks,
session: AsyncSession = Depends(get_session),
):
meeting = await session.get(Meeting, body.meeting_id)
if not meeting:
raise HTTPException(404, "meeting not found")
meeting.file_size = body.file_size
meeting.status = MeetingStatus.uploaded
await session.commit()
background.add_task(process_meeting, meeting.id)
return {"ok": True, "meeting_id": meeting.id}
@router.get("/meetings", response_model=list[MeetingRead])
async def list_meetings(session: AsyncSession = Depends(get_session)):
result = await session.execute(select(Meeting).order_by(Meeting.created_at.desc()))
return result.scalars().all()
@router.get("/meetings/{meeting_id}", response_model=MeetingRead)
async def get_meeting(meeting_id: int, session: AsyncSession = Depends(get_session)):
meeting = await session.get(Meeting, meeting_id)
if not meeting:
raise HTTPException(404, "meeting not found")
return meeting
@router.get("/meetings/{meeting_id}/transcript")
async def get_transcript(meeting_id: int, session: AsyncSession = Depends(get_session)):
result = await session.execute(
select(TranscriptSegment)
.where(TranscriptSegment.meeting_id == meeting_id)
.order_by(TranscriptSegment.start)
)
segments = result.scalars().all()
return [
{"start": s.start, "end": s.end, "speaker": s.speaker, "text": s.text}
for s in segments
]
@router.get("/meetings/{meeting_id}/summary")
async def get_summary(meeting_id: int, session: AsyncSession = Depends(get_session)):
result = await session.execute(
select(Summary).where(Summary.meeting_id == meeting_id)
)
summary = result.scalar_one_or_none()
if not summary:
return None
return {
"key_points": summary.key_points,
"todos": summary.todos,
"decisions": summary.decisions,
"keywords": summary.keywords,
"preview": summary.preview,
}
@router.delete("/meetings/{meeting_id}")
async def delete_meeting(meeting_id: int, session: AsyncSession = Depends(get_session)):
meeting = await session.get(Meeting, meeting_id)
if not meeting:
raise HTTPException(404, "meeting not found")
await session.delete(meeting)
await session.commit()
return {"ok": True}

38
api/app/schemas.py Normal file
View File

@@ -0,0 +1,38 @@
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
from .models import MeetingStatus
class UploadUrlRequest(BaseModel):
filename: str
content_type: str = "audio/mp4"
title: Optional[str] = None
participants: Optional[str] = None
class UploadUrlResponse(BaseModel):
meeting_id: int
upload_url: str
object_key: str
class UploadCompleteRequest(BaseModel):
meeting_id: int
file_size: int
class MeetingRead(BaseModel):
id: int
title: str
participants: Optional[str] = None
duration: Optional[int] = None
file_size: Optional[int] = None
status: MeetingStatus
chunks_done: int
chunks_total: int
error: Optional[str] = None
created_at: datetime
class Config:
from_attributes = True

View File

View File

@@ -0,0 +1,60 @@
"""MinIO / S3 presigned URL helper.
We use boto3 because MinIO speaks the S3 protocol and boto3 is the de-facto
client. Generating presigned multipart upload URLs lets the browser upload
files >100MB directly to MinIO without going through the FastAPI process,
which avoids body-size limits and streaming-into-RAM issues.
"""
import boto3
from botocore.client import Config
from ..config import settings
def s3_client():
return boto3.client(
"s3",
endpoint_url=("https" if settings.minio_secure else "http")
+ "://"
+ settings.minio_endpoint,
aws_access_key_id=settings.minio_access_key,
aws_secret_access_key=settings.minio_secret_key,
region_name=settings.minio_region,
config=Config(signature_version="s3v4", s3={"addressing_style": "path"}),
)
def ensure_bucket() -> None:
client = s3_client()
try:
client.head_bucket(Bucket=settings.minio_bucket)
except Exception:
client.create_bucket(Bucket=settings.minio_bucket)
def presign_put(object_key: str, content_type: str, expires: int = 3600) -> str:
"""Single-PUT presigned URL — for files small enough not to need multipart.
For multipart (files >100MB), the frontend should use the AWS SDK's
@aws-sdk/lib-storage Upload helper, which can sign each part itself once
we hand it the credentials. For MVP we keep things simple with single-PUT
+ a 500 MB cap.
"""
return s3_client().generate_presigned_url(
"put_object",
Params={
"Bucket": settings.minio_bucket,
"Key": object_key,
"ContentType": content_type,
},
ExpiresIn=expires,
HttpMethod="PUT",
)
def presign_get(object_key: str, expires: int = 3600) -> str:
return s3_client().generate_presigned_url(
"get_object",
Params={"Bucket": settings.minio_bucket, "Key": object_key},
ExpiresIn=expires,
)

View File

@@ -0,0 +1,46 @@
"""Stub for the transcribe + summarize pipeline.
This module is intentionally a placeholder for A3. The real implementation
lands in A5 and will:
1. Download the object from MinIO
2. Probe duration and size
3. If file > 24 MB, ffmpeg silencedetect → split into <20 MB chunks
4. Call Groq Whisper for each chunk, shift timestamps, merge
5. Call Poe Claude with map-reduce for long audio
6. Persist segments + summary, update meeting.status to done
"""
import asyncio
from sqlmodel import select
from ..db import AsyncSessionLocal
from ..models import Meeting, MeetingStatus
async def process_meeting(meeting_id: int) -> None:
"""Background task launched after upload-complete.
For A3 we just walk the state machine so the frontend can see status
transitions; A5 swaps in real Groq/Poe calls.
"""
async with AsyncSessionLocal() as session:
meeting = await session.get(Meeting, meeting_id)
if not meeting:
return
try:
for status, delay in [
(MeetingStatus.splitting, 1),
(MeetingStatus.transcribing, 2),
(MeetingStatus.summarizing, 1),
(MeetingStatus.done, 0),
]:
meeting.status = status
if status == MeetingStatus.transcribing:
meeting.chunks_total = 1
meeting.chunks_done = 1
await session.commit()
await asyncio.sleep(delay)
except Exception as exc:
meeting.status = MeetingStatus.failed
meeting.error = str(exc)
await session.commit()

12
api/requirements.txt Normal file
View File

@@ -0,0 +1,12 @@
fastapi==0.115.6
uvicorn[standard]==0.34.0
sqlmodel==0.0.22
aiosqlite==0.20.0
asyncpg==0.30.0
greenlet==3.1.1
python-multipart==0.0.20
python-dotenv==1.0.1
httpx==0.28.1
boto3==1.35.93
pydantic==2.10.4
pydantic-settings==2.7.0