auto-save 2026-04-01 09:03 (+8, ~2)
This commit is contained in:
38
scripts/test_device.py
Normal file
38
scripts/test_device.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Quick test: check ADB device connection and take a screenshot."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from src.capture import ADBCapture
|
||||
|
||||
|
||||
def main():
|
||||
cap = ADBCapture()
|
||||
|
||||
print("Checking device...")
|
||||
info = cap.check_device()
|
||||
|
||||
if not info["connected"]:
|
||||
print(f"[FAIL] {info['error']}")
|
||||
print()
|
||||
print("Troubleshooting:")
|
||||
print(" 1. USB debugging enabled on phone?")
|
||||
print(" 2. Run: adb devices")
|
||||
print(" 3. Accept USB debugging prompt on phone")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"[OK] Device: {info['model']}")
|
||||
print(f" Serial: {info['serial']}")
|
||||
print(f" Resolution: {info['resolution']}")
|
||||
print(f" All devices: {info['all_devices']}")
|
||||
|
||||
print("\nTaking screenshot...")
|
||||
img = cap.screenshot(save=True)
|
||||
print(f"[OK] Screenshot: {img.size[0]}x{img.size[1]}")
|
||||
print(f" Saved to: {cap.screenshot_dir}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
149
scripts/test_ocr_grounding.py
Normal file
149
scripts/test_ocr_grounding.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""Test OCR grounding: take a screenshot and find text elements.
|
||||
|
||||
Usage:
|
||||
# Find a specific text on current screen
|
||||
python scripts/test_ocr_grounding.py "微信"
|
||||
|
||||
# Detect ALL text on screen (debug mode)
|
||||
python scripts/test_ocr_grounding.py --all
|
||||
|
||||
# Use a saved screenshot instead of live ADB capture
|
||||
python scripts/test_ocr_grounding.py "发送" --image data/screenshots/test.png
|
||||
|
||||
# Try different engines
|
||||
python scripts/test_ocr_grounding.py "微信" --engine easyocr
|
||||
python scripts/test_ocr_grounding.py "微信" --engine pytesseract
|
||||
|
||||
# Also try uiautomator dump (hybrid mode)
|
||||
python scripts/test_ocr_grounding.py "微信" --hybrid
|
||||
|
||||
# Save annotated screenshot with bounding boxes drawn
|
||||
python scripts/test_ocr_grounding.py --all --annotate
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from src.grounding.ocr_grounding import OCRGrounding
|
||||
|
||||
|
||||
def annotate_image(img: Image.Image, boxes, query: str = "") -> Image.Image:
|
||||
"""Draw bounding boxes on the image for visualization."""
|
||||
annotated = img.copy()
|
||||
draw = ImageDraw.Draw(annotated)
|
||||
|
||||
for box in boxes:
|
||||
is_match = box.contains_text(query) if query else False
|
||||
color = "red" if is_match else "lime"
|
||||
width = 3 if is_match else 1
|
||||
|
||||
draw.rectangle(
|
||||
[box.x, box.y, box.x + box.w, box.y + box.h],
|
||||
outline=color, width=width,
|
||||
)
|
||||
label = f"{box.text} ({box.confidence:.0%})"
|
||||
draw.text((box.x, box.y - 14), label, fill=color)
|
||||
|
||||
return annotated
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test OCR grounding on phone screen")
|
||||
parser.add_argument("query", nargs="?", default=None, help="Text to find on screen")
|
||||
parser.add_argument("--all", action="store_true", help="Detect all text on screen")
|
||||
parser.add_argument("--image", type=str, help="Use saved screenshot instead of ADB")
|
||||
parser.add_argument("--engine", type=str, default="auto",
|
||||
choices=["auto", "pytesseract", "easyocr"],
|
||||
help="OCR engine to use")
|
||||
parser.add_argument("--hybrid", action="store_true",
|
||||
help="Try uiautomator + OCR hybrid approach")
|
||||
parser.add_argument("--annotate", action="store_true",
|
||||
help="Save annotated screenshot with bounding boxes")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.query and not args.all:
|
||||
parser.error("Provide a search query or --all")
|
||||
|
||||
# Get screenshot
|
||||
if args.image:
|
||||
print(f"Loading image: {args.image}")
|
||||
img = Image.open(args.image)
|
||||
else:
|
||||
from src.capture import ADBCapture
|
||||
cap = ADBCapture()
|
||||
info = cap.check_device()
|
||||
if not info["connected"]:
|
||||
print(f"[FAIL] {info['error']}")
|
||||
sys.exit(1)
|
||||
print(f"Device: {info['model']} ({info['resolution']})")
|
||||
print("Taking screenshot...")
|
||||
img = cap.screenshot(save=True)
|
||||
|
||||
print(f"Image size: {img.width}x{img.height}")
|
||||
grounding = OCRGrounding(engine=args.engine)
|
||||
|
||||
if args.all:
|
||||
print(f"\n--- Detecting ALL text (engine={args.engine}) ---\n")
|
||||
boxes = grounding.detect_all(img)
|
||||
if not boxes:
|
||||
print("[WARN] No text detected!")
|
||||
else:
|
||||
print(f"Found {len(boxes)} text regions:\n")
|
||||
for i, box in enumerate(boxes, 1):
|
||||
nx, ny = box.center_normalized(img.width, img.height)
|
||||
print(f" {i:3d}. '{box.text}'")
|
||||
print(f" pixel=({box.cx}, {box.cy}) "
|
||||
f"norm=({nx:.3f}, {ny:.3f}) "
|
||||
f"conf={box.confidence:.0%}")
|
||||
|
||||
if args.annotate and boxes:
|
||||
out_path = "data/screenshots/annotated_all.png"
|
||||
annotated = annotate_image(img, boxes, query=args.query or "")
|
||||
annotated.save(out_path)
|
||||
print(f"\nAnnotated image saved: {out_path}")
|
||||
|
||||
if args.query:
|
||||
print(f"\n--- Searching for: '{args.query}' (engine={args.engine}) ---\n")
|
||||
|
||||
if args.hybrid:
|
||||
result = grounding.find_text_hybrid(img, args.query)
|
||||
else:
|
||||
result = grounding.find_text(img, args.query)
|
||||
|
||||
if result is None:
|
||||
print(f"[NOT FOUND] '{args.query}' was not found on screen.")
|
||||
print("\nTip: Run with --all to see all detected text.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
nx, ny = result.center_normalized(img.width, img.height)
|
||||
print(f"[FOUND] '{result.text}'")
|
||||
print(f" Pixel center: ({result.cx}, {result.cy})")
|
||||
print(f" Normalized center: ({nx:.4f}, {ny:.4f})")
|
||||
print(f" Bounding box: x={result.x} y={result.y} "
|
||||
f"w={result.w} h={result.h}")
|
||||
print(f" Confidence: {result.confidence:.0%}")
|
||||
print()
|
||||
print(f" To tap this element:")
|
||||
print(f" adb shell input tap {result.cx} {result.cy}")
|
||||
|
||||
# Show all matches
|
||||
all_matches = grounding.find_all_matches(img, args.query)
|
||||
if len(all_matches) > 1:
|
||||
print(f"\n ({len(all_matches)} total matches found)")
|
||||
for i, m in enumerate(all_matches):
|
||||
print(f" {i+1}. '{m.text}' at ({m.cx},{m.cy}) conf={m.confidence:.0%}")
|
||||
|
||||
if args.annotate:
|
||||
boxes = grounding.detect_all(img)
|
||||
out_path = "data/screenshots/annotated_search.png"
|
||||
annotated = annotate_image(img, boxes, query=args.query)
|
||||
annotated.save(out_path)
|
||||
print(f"\nAnnotated image saved: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user