Files
gui-agent/src/main.py

123 lines
3.2 KiB
Python

"""Phone GUI Agent - Main Entry Point
Web console for controlling the agent loop.
"""
import asyncio
import json
from pathlib import Path
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from config import settings
from src.capture import ADBCapture
from src.planner.agent_loop import AgentLoop
app = FastAPI(title="Phone GUI Agent", version="0.1.0")
BASE_DIR = Path(__file__).parent.parent
app.mount("/static", StaticFiles(directory=BASE_DIR / "web" / "static"), name="static")
templates = Jinja2Templates(directory=BASE_DIR / "web" / "templates")
# Global state
capture = ADBCapture()
agent = AgentLoop()
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(request, "index.html")
@app.get("/api/device")
async def device_info():
"""Check device connection status."""
try:
info = capture.check_device()
return info
except Exception as e:
return {"connected": False, "error": str(e)}
@app.get("/api/screenshot")
async def take_screenshot():
"""Take a screenshot and return base64."""
try:
b64 = capture.screenshot_base64()
return {"ok": True, "image": b64}
except Exception as e:
return {"ok": False, "error": str(e)}
@app.post("/api/stop")
async def stop_task():
"""Stop the current running task."""
agent.stop()
return {"ok": True}
@app.websocket("/ws/task")
async def task_websocket(ws: WebSocket):
"""WebSocket endpoint for running tasks with real-time updates.
Client sends: {"task": "打开微信搜索张三"}
Server streams: StepResult objects as JSON
"""
await ws.accept()
try:
data = await ws.receive_json()
task = data.get("task", "")
if not task:
await ws.send_json({"error": "No task provided"})
return
await ws.send_json({"status": "started", "task": task})
def on_step(result):
asyncio.get_event_loop().call_soon_threadsafe(
asyncio.ensure_future,
ws.send_json({
"status": "step",
"step": result.step,
"observation": result.observation,
"thinking": result.thinking,
"action_type": result.action_type,
"action_desc": result.action_desc,
"screenshot": result.screenshot_before[:100] + "..." if result.screenshot_before else None,
"error": result.error,
})
)
session = await agent.run_task(task, on_step=on_step)
await ws.send_json({
"status": session.status,
"total_steps": len(session.steps),
"task": task,
})
except WebSocketDisconnect:
agent.stop()
except Exception as e:
try:
await ws.send_json({"error": str(e)})
except Exception:
pass
def main():
import uvicorn
uvicorn.run(
"src.main:app",
host=settings.host,
port=settings.port,
reload=True,
)
if __name__ == "__main__":
main()