auto-save 2026-04-01 09:03 (+8, ~2)
This commit is contained in:
43
android-ocr-service/app/build.gradle.kts
Normal file
43
android-ocr-service/app/build.gradle.kts
Normal file
@@ -0,0 +1,43 @@
|
||||
plugins {
|
||||
id("com.android.application")
|
||||
id("org.jetbrains.kotlin.android")
|
||||
}
|
||||
|
||||
android {
|
||||
namespace = "com.guiagent.ocr"
|
||||
compileSdk = 31
|
||||
|
||||
defaultConfig {
|
||||
applicationId = "com.guiagent.ocr"
|
||||
minSdk = 26
|
||||
targetSdk = 31
|
||||
versionCode = 1
|
||||
versionName = "1.0"
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
release {
|
||||
isMinifyEnabled = false
|
||||
}
|
||||
}
|
||||
|
||||
compileOptions {
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
targetCompatibility = JavaVersion.VERSION_1_8
|
||||
}
|
||||
|
||||
kotlinOptions {
|
||||
jvmTarget = "1.8"
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
// ML Kit Text Recognition - bundled model (no GMS needed!)
|
||||
implementation("com.google.mlkit:text-recognition-chinese:16.0.0")
|
||||
|
||||
// HTTP server
|
||||
implementation("org.nanohttpd:nanohttpd:2.3.1")
|
||||
|
||||
// JSON
|
||||
implementation("com.google.code.gson:gson:2.10.1")
|
||||
}
|
||||
28
android-ocr-service/app/src/main/AndroidManifest.xml
Normal file
28
android-ocr-service/app/src/main/AndroidManifest.xml
Normal file
@@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
|
||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.INTERNET"/>
|
||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE"/>
|
||||
|
||||
<application
|
||||
android:allowBackup="false"
|
||||
android:label="OCR Service"
|
||||
android:supportsRtl="true">
|
||||
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN"/>
|
||||
<category android:name="android.intent.category.LAUNCHER"/>
|
||||
</intent-filter>
|
||||
</activity>
|
||||
|
||||
<service
|
||||
android:name=".OcrService"
|
||||
android:exported="true"
|
||||
android:foregroundServiceType="dataSync"/>
|
||||
|
||||
</application>
|
||||
</manifest>
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.guiagent.ocr
|
||||
|
||||
import android.app.Activity
|
||||
import android.content.Intent
|
||||
import android.os.Bundle
|
||||
import android.widget.TextView
|
||||
|
||||
class MainActivity : Activity() {
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
val tv = TextView(this).apply {
|
||||
text = "OCR Service\nPort: 18900\nStarting..."
|
||||
textSize = 20f
|
||||
setPadding(40, 40, 40, 40)
|
||||
}
|
||||
setContentView(tv)
|
||||
|
||||
// Start the service
|
||||
val intent = Intent(this, OcrService::class.java)
|
||||
startForegroundService(intent)
|
||||
tv.text = "OCR Service\nPort: 18900\nRunning!"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
package com.guiagent.ocr
|
||||
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.BitmapFactory
|
||||
import com.google.mlkit.vision.common.InputImage
|
||||
import com.google.mlkit.vision.text.TextRecognition
|
||||
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions
|
||||
import java.io.File
|
||||
import java.util.concurrent.CountDownLatch
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
data class TextBox(
|
||||
val text: String,
|
||||
val x: Int,
|
||||
val y: Int,
|
||||
val w: Int,
|
||||
val h: Int,
|
||||
val confidence: Float
|
||||
) {
|
||||
val cx get() = x + w / 2
|
||||
val cy get() = y + h / 2
|
||||
}
|
||||
|
||||
object OcrEngine {
|
||||
|
||||
private val recognizer by lazy {
|
||||
TextRecognition.getClient(ChineseTextRecognizerOptions.Builder().build())
|
||||
}
|
||||
|
||||
fun recognize(imagePath: String): List<TextBox> {
|
||||
val file = File(imagePath)
|
||||
if (!file.exists()) return emptyList()
|
||||
val bitmap = BitmapFactory.decodeFile(imagePath) ?: return emptyList()
|
||||
return recognizeBitmap(bitmap)
|
||||
}
|
||||
|
||||
/** 直接截屏并识别,不落盘 */
|
||||
fun screencapAndRecognize(): List<TextBox> {
|
||||
val process = Runtime.getRuntime().exec("screencap -p")
|
||||
val bytes = process.inputStream.readBytes()
|
||||
process.waitFor()
|
||||
if (bytes.isEmpty()) return emptyList()
|
||||
val bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size) ?: return emptyList()
|
||||
return recognizeBitmap(bitmap)
|
||||
}
|
||||
|
||||
fun recognizeBitmap(bitmap: Bitmap): List<TextBox> {
|
||||
val image = InputImage.fromBitmap(bitmap, 0)
|
||||
val results = mutableListOf<TextBox>()
|
||||
val latch = CountDownLatch(1)
|
||||
|
||||
recognizer.process(image)
|
||||
.addOnSuccessListener { visionText ->
|
||||
for (block in visionText.textBlocks) {
|
||||
for (line in block.lines) {
|
||||
val box = line.boundingBox ?: continue
|
||||
results.add(
|
||||
TextBox(
|
||||
text = line.text,
|
||||
x = box.left,
|
||||
y = box.top,
|
||||
w = box.width(),
|
||||
h = box.height(),
|
||||
confidence = line.confidence ?: 0.8f
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
latch.countDown()
|
||||
}
|
||||
.addOnFailureListener {
|
||||
latch.countDown()
|
||||
}
|
||||
|
||||
latch.await(10, TimeUnit.SECONDS)
|
||||
bitmap.recycle()
|
||||
return results
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package com.guiagent.ocr
|
||||
|
||||
import android.graphics.BitmapFactory
|
||||
import com.google.gson.Gson
|
||||
import fi.iki.elonen.NanoHTTPD
|
||||
import java.io.ByteArrayOutputStream
|
||||
|
||||
class OcrHttpServer(port: Int = 18900) : NanoHTTPD(port) {
|
||||
|
||||
private val gson = Gson()
|
||||
private val defaultPath = "/sdcard/ocr_screen.png"
|
||||
|
||||
override fun serve(session: IHTTPSession): Response {
|
||||
return when (session.uri) {
|
||||
"/ocr" -> handleOcr(session)
|
||||
"/snap" -> handleSnap(session)
|
||||
"/health" -> jsonResponse(mapOf("status" to "ok", "engine" to "mlkit-chinese"))
|
||||
else -> newFixedLengthResponse(Response.Status.NOT_FOUND, MIME_PLAINTEXT, "404")
|
||||
}
|
||||
}
|
||||
|
||||
/** 读文件方式 OCR */
|
||||
private fun handleOcr(session: IHTTPSession): Response {
|
||||
val params = session.parms ?: emptyMap()
|
||||
val imagePath = params["path"] ?: defaultPath
|
||||
return doOcr(params["text"]) { OcrEngine.recognize(imagePath) }
|
||||
}
|
||||
|
||||
/** POST 图片数据直接 OCR,不存文件 */
|
||||
private fun handleSnap(session: IHTTPSession): Response {
|
||||
val params = session.parms ?: emptyMap()
|
||||
|
||||
if (session.method == Method.POST) {
|
||||
// NanoHTTPD parseBody 将 binary data 存到临时文件
|
||||
val bodyFiles = HashMap<String, String>()
|
||||
session.parseBody(bodyFiles)
|
||||
|
||||
// postData 键对应临时文件路径
|
||||
val tmpPath = bodyFiles["postData"]
|
||||
if (tmpPath != null) {
|
||||
val imageBytes = java.io.File(tmpPath).readBytes()
|
||||
val bitmap = BitmapFactory.decodeByteArray(imageBytes, 0, imageBytes.size)
|
||||
if (bitmap != null) {
|
||||
return doOcr(params["text"]) { OcrEngine.recognizeBitmap(bitmap) }
|
||||
}
|
||||
return jsonResponse(mapOf("error" to "decode failed", "size" to imageBytes.size, "count" to 0))
|
||||
}
|
||||
return jsonResponse(mapOf("error" to "no body received", "count" to 0))
|
||||
}
|
||||
|
||||
// GET: 读文件方式 fallback
|
||||
return handleOcr(session)
|
||||
}
|
||||
|
||||
private fun doOcr(query: String?, recognize: () -> List<TextBox>): Response {
|
||||
val startTime = System.currentTimeMillis()
|
||||
var results = recognize()
|
||||
|
||||
if (!query.isNullOrBlank()) {
|
||||
results = results.filter { it.text.contains(query) }
|
||||
}
|
||||
|
||||
val elapsed = System.currentTimeMillis() - startTime
|
||||
|
||||
val response = mapOf(
|
||||
"results" to results.map { box ->
|
||||
mapOf(
|
||||
"text" to box.text,
|
||||
"x" to box.x,
|
||||
"y" to box.y,
|
||||
"w" to box.w,
|
||||
"h" to box.h,
|
||||
"cx" to box.cx,
|
||||
"cy" to box.cy,
|
||||
"confidence" to box.confidence
|
||||
)
|
||||
},
|
||||
"count" to results.size,
|
||||
"elapsed_ms" to elapsed
|
||||
)
|
||||
return jsonResponse(response)
|
||||
}
|
||||
|
||||
private fun jsonResponse(data: Any): Response {
|
||||
val json = gson.toJson(data)
|
||||
return newFixedLengthResponse(Response.Status.OK, "application/json", json)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.guiagent.ocr
|
||||
|
||||
import android.app.*
|
||||
import android.content.Intent
|
||||
import android.os.Build
|
||||
import android.os.IBinder
|
||||
import android.util.Log
|
||||
|
||||
class OcrService : Service() {
|
||||
|
||||
private var server: OcrHttpServer? = null
|
||||
private val TAG = "OcrService"
|
||||
private val PORT = 18900
|
||||
|
||||
override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
|
||||
startForegroundNotification()
|
||||
|
||||
if (server == null) {
|
||||
server = OcrHttpServer(PORT).also {
|
||||
it.start()
|
||||
Log.i(TAG, "OCR HTTP server started on port $PORT")
|
||||
}
|
||||
}
|
||||
return START_STICKY
|
||||
}
|
||||
|
||||
override fun onDestroy() {
|
||||
server?.stop()
|
||||
server = null
|
||||
Log.i(TAG, "OCR HTTP server stopped")
|
||||
super.onDestroy()
|
||||
}
|
||||
|
||||
override fun onBind(intent: Intent?): IBinder? = null
|
||||
|
||||
private fun startForegroundNotification() {
|
||||
val channelId = "ocr_service"
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val channel = NotificationChannel(channelId, "OCR Service", NotificationManager.IMPORTANCE_LOW)
|
||||
getSystemService(NotificationManager::class.java).createNotificationChannel(channel)
|
||||
}
|
||||
val notification = Notification.Builder(this, channelId)
|
||||
.setContentTitle("OCR Service")
|
||||
.setContentText("Running on port $PORT")
|
||||
.setSmallIcon(android.R.drawable.ic_menu_camera)
|
||||
.build()
|
||||
startForeground(1, notification)
|
||||
}
|
||||
}
|
||||
4
android-ocr-service/app/src/main/res/values/strings.xml
Normal file
4
android-ocr-service/app/src/main/res/values/strings.xml
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<string name="app_name">OCR Service</string>
|
||||
</resources>
|
||||
Reference in New Issue
Block a user