当前位置: 首页 > news >正文

安卓OCR使用(Google ML Kit)

OCR是一个很常用的功能,Google ML Kit提供了OCR能力,用起来也很简单,本文介绍一下使用方法。

1. 相关概念

名词概念解释
TextBlock一个段落
Line一行文本
Element元素单词;对汉字来说,类似"开头 (分隔符)中间(分隔符) 结尾"这样含有明显分隔符的才会有多个字在一个Element中,否则就是单个字
Symbol字符字母;对汉字来说就是单个字

2. 代码实现

在build.gradle中添加相关依赖:

// To recognize Chinese script
implementation 'com.google.mlkit:text-recognition-chinese:16.0.1'

添加布局文件activity_ocr.xml:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"android:layout_width="match_parent"android:layout_height="match_parent"android:orientation="vertical"><FrameLayoutandroid:layout_width="wrap_content"android:layout_height="wrap_content"><SurfaceViewandroid:id="@+id/camera_preview"android:layout_width="wrap_content"android:layout_height="wrap_content" /><com.example.study.views.DrawViewandroid:id="@+id/ocr_area"android:layout_width="wrap_content"android:layout_height="wrap_content" /></FrameLayout><Buttonandroid:id="@+id/ocr_switch"android:layout_width="match_parent"android:layout_height="match_parent"android:layout_gravity="center_horizontal|bottom"android:layout_marginBottom="80dp"android:background="@color/夏云灰"android:text="stop" />
</LinearLayout>

绘制文字的OCRDrawView.java:

package com.example.study.views;import android.content.Context;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.Path;
import android.graphics.Point;
import android.util.AttributeSet;
import android.view.View;import androidx.annotation.Nullable;import java.util.ArrayList;
import java.util.List;public class OCRDrawView extends View {private final Object lock = new Object();protected Paint paint = new Paint();protected Path path = new Path();private final List<ShapeInfo> cornerPointsList = new ArrayList<>();public OCRDrawView(Context context) {super(context);}public OCRDrawView(Context context, @Nullable AttributeSet attrs) {super(context, attrs);}public void clear() {synchronized (lock) {cornerPointsList.clear();}postInvalidate();}public void add(Point[] cornerPoints, String text) {synchronized (lock) {cornerPointsList.add(new ShapeInfo(cornerPoints, text));}}@Overrideprotected void onDraw(Canvas canvas) {super.onDraw(canvas);synchronized (lock) {for (ShapeInfo shapeInfo : cornerPointsList) {drawBackground(shapeInfo, canvas);drawText(shapeInfo, canvas);}}}private void drawText(ShapeInfo shapeInfo, Canvas canvas) {Point[] points = shapeInfo.points;// 根据矩形区域的高度设置文字大小double height = calDistance(points[0], points[3]);double width = calDistance(points[2], points[3]);float textSize = (float) Math.min(height, width / shapeInfo.text.length());paint.setColor(Color.BLUE);paint.setTextSize(textSize);path.reset();path.moveTo(points[3].x, points[3].y);path.lineTo(points[2].x, points[2].y);canvas.drawTextOnPath(shapeInfo.text, path, 0, 0, paint);}private double calDistance(Point start, Point end) {return Math.sqrt(Math.pow(start.x - end.x, 2) + Math.pow(start.y - end.y, 2));}private void drawBackground(ShapeInfo shapeInfo, Canvas canvas) {Point[] shape = shapeInfo.points;path.reset();path.moveTo(shape[3].x, shape[3].y);for (int i = 0; i < shape.length; i++) {path.lineTo(shape[i].x, shape[i].y);}path.close();paint.setColor(Color.WHITE);canvas.drawPath(path, paint);}static class ShapeInfo {Point[] points;String text;public ShapeInfo(Point[] shape, String text) {this.points = shape;this.text = text;}}
}

activity类:

package com.example.study.activities;import android.Manifest;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.ImageFormat;
import android.graphics.Matrix;
import android.graphics.Point;
import android.graphics.Rect;
import android.graphics.YuvImage;
import android.hardware.Camera;
import android.os.Bundle;
import android.util.Log;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.ViewGroup;
import android.widget.Button;
import android.widget.FrameLayout;
import android.widget.Toast;import androidx.activity.ComponentActivity;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;import com.example.study.R;
import com.example.study.views.OCRDrawView;
import com.google.mlkit.vision.text.Text;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions;import java.io.ByteArrayOutputStream;public class OCRActivity extends ComponentActivity implements Camera.PreviewCallback, SurfaceHolder.Callback {private static final String TAG = "CameraDemoActivity";private static final int REQUEST_CAMERA = 1000;private static final int HEIGHT = 1920;private static final int WIDTH = 1080;private static final int ORIENTATION = 90;private SurfaceView preview;private OCRDrawView ocrArea;private Button ocrSwitch;private Camera camera;private Camera.Parameters parameters;private TextRecognizer recognizer;private Matrix matrix;private boolean isRecognizering = false;private boolean stopRecognizer = false;@Overrideprotected void onCreate(@Nullable Bundle savedInstanceState) {super.onCreate(savedInstanceState);this.setContentView(R.layout.activity_ocr);initView();initVar();// 检查权限if (checkSelfPermission(Manifest.permission.CAMERA) != PackageManager.PERMISSION_GRANTED) {requestPermissions(new String[]{Manifest.permission.CAMERA}, REQUEST_CAMERA);} else {preview.getHolder().addCallback(this);}}private void initVar() {recognizer = TextRecognition.getClient(new ChineseTextRecognizerOptions.Builder().build());matrix = new Matrix();matrix.setRotate(ORIENTATION);// 4个角的坐标是没有旋转过的,所以HEIGHT、WIDTH是反的matrix.preTranslate(-HEIGHT >> 1, -WIDTH >> 1);}private void initView() {preview = findViewById(R.id.camera_preview);ocrArea = findViewById(R.id.ocr_area);ocrSwitch = findViewById(R.id.ocr_switch);ocrSwitch.setOnClickListener(view -> {stopRecognizer = !stopRecognizer;ocrSwitch.setText(stopRecognizer ? "start" : "stop");if (camera == null) {return;}if (stopRecognizer) {camera.stopPreview();} else {camera.startPreview();}});adjustSurface(preview, ocrArea);}private void adjustSurface(SurfaceView cameraPreview, OCRDrawView ocrArea) {FrameLayout.LayoutParams cameraPreviewParams = (FrameLayout.LayoutParams) cameraPreview.getLayoutParams();cameraPreviewParams.width = WIDTH;cameraPreviewParams.height = HEIGHT;ViewGroup.LayoutParams ocrAreaParams = ocrArea.getLayoutParams();ocrAreaParams.width = WIDTH;ocrAreaParams.height = HEIGHT;}@Overridepublic void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {super.onRequestPermissionsResult(requestCode, permissions, grantResults);if (requestCode == REQUEST_CAMERA && grantResults.length > 0) {if (grantResults[0] == PackageManager.PERMISSION_GRANTED) {preview.getHolder().addCallback(this);surfaceCreated(preview.getHolder());camera.setPreviewCallback(this);camera.startPreview();} else {finish();}}}@Overridepublic void onPreviewFrame(byte[] data, Camera camera) {if (isRecognizering || stopRecognizer) {return;}Bitmap bitmap = convertToBitmap(camera, data);isRecognizering = true;recognizer.process(bitmap, ORIENTATION).addOnSuccessListener(text -> {parseOCRResult(text);}).addOnFailureListener(exception -> {Toast.makeText(this, "Failure", Toast.LENGTH_SHORT).show();isRecognizering = false;}).addOnCompleteListener(task -> {isRecognizering = false;}).addOnCanceledListener(() -> {Toast.makeText(this, "Canceled", Toast.LENGTH_SHORT).show();isRecognizering = false;});}private void parseOCRResult(Text text) {// 所有识别到的内容,下同String textContent = text.getText();if (textContent == null || textContent.trim().length() == 0) {return;}ocrArea.clear();// 块,段落for (Text.TextBlock textBlock : text.getTextBlocks()) {// 一行文本for (Text.Line line : textBlock.getLines()) {drawResult(line);// 元素:单词,对汉字来说,需要"开头 (分隔符)中间(分隔符) 结尾"之类比较强烈的分隔符去分隔for (Text.Element element : line.getElements()) {// symbol:字符,字母,字for (Text.Symbol symbol : element.getSymbols()) {symbol.getText();}}}}}private void drawResult(Text.Line line) {// line的旋转角度(以度为单位,顺时针为正,范围为[-180, 180])float angle = line.getAngle() + ORIENTATION;// 检测到的文本的轴对齐边界矩形Rect boundingBox = line.getBoundingBox();// 从左上角开始顺时针方向的四个角点。不带旋转角度,如果设置过旋转角度camera.setDisplayOrientation,需要进行旋转Point[] cornerPoints = line.getCornerPoints();// 置信度float confidence = line.getConfidence();// 获取文本中的主要语言(如果有的话)String recognizedLanguage = line.getRecognizedLanguage();// 置信度太低的过滤掉if (confidence < 0.3f) {return;}for (Point cornerPoint : cornerPoints) {float[] floats = {cornerPoint.x, cornerPoint.y};matrix.mapPoints(floats);cornerPoint.x = (int) floats[0] + (WIDTH >> 1);cornerPoint.y = (int) floats[1] + (HEIGHT >> 1);}ocrArea.add(cornerPoints, line.getText());ocrArea.postInvalidate();}/*** Convert camera data into bitmap data.*/private Bitmap convertToBitmap(Camera camera, byte[] data) {int width = camera.getParameters().getPreviewSize().width;int height = camera.getParameters().getPreviewSize().height;YuvImage yuv = new YuvImage(data, ImageFormat.NV21, width, height, null);ByteArrayOutputStream stream = new ByteArrayOutputStream();yuv.compressToJpeg(new Rect(0, 0, width, height), 100, stream);return BitmapFactory.decodeByteArray(stream.toByteArray(), 0, stream.toByteArray().length);}@Overrideprotected void onResume() {super.onResume();}@Overrideprotected void onRestart() {super.onRestart();}@Overrideprotected void onDestroy() {super.onDestroy();if (recognizer != null) {recognizer.close();}}@Overridepublic void surfaceCreated(@NonNull SurfaceHolder holder) {try {camera = Camera.open(Camera.CameraInfo.CAMERA_FACING_BACK);parameters = camera.getParameters();// 旋转了90度,所以height、width互换parameters.setPictureSize(HEIGHT, WIDTH);parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);parameters.setPictureFormat(ImageFormat.NV21);camera.setPreviewDisplay(holder);camera.setDisplayOrientation(ORIENTATION);camera.setParameters(parameters);} catch (Exception exception) {Log.i(TAG, exception.getMessage());}}@Overridepublic void surfaceChanged(@NonNull SurfaceHolder holder, int format, int width, int height) {if (camera != null) {camera.stopPreview();camera.setPreviewCallback(null);camera.startPreview();camera.setPreviewCallback(this);ocrArea.clear();stopRecognizer = true;ocrSwitch.performClick();}}@Overridepublic void surfaceDestroyed(@NonNull SurfaceHolder holder) {if (camera != null) {camera.stopPreview();camera.setPreviewCallback(null);camera.release();}}
}

参考文章

  1. 文字识别 v2

http://www.mrgr.cn/news/82913.html

相关文章:

  • Java-数据结构-链表-高频面试题(1)
  • B. Tournament 题解与代码实现
  • 【计算机视觉】单目深度估计模型-Depth Anything-V2
  • 韩国机场WebGIS可视化集合Google遥感影像分析
  • 【通识安全】煤气中毒急救的处置
  • 爬取猫眼电影Top 100榜单:从入门到实战
  • 【华为OD机试E卷C卷D卷】跳马【C++/Java/Python】
  • Python应用指南:高德交通态势数据(一)
  • java 核心知识点——基础知识
  • 群论学习笔记
  • 关于大一上的总结
  • 【Linux】sed编辑器
  • Functions
  • 高效工作流:用Mermaid绘制你的专属流程图;如何在Vue3中导入mermaid绘制流程图
  • SQL编程语言
  • Postman接口测试05|实战项目笔记
  • 进程件通信——网络通信——TCP
  • unity-入门查漏补缺0.2.03.10
  • 计算机网络学习
  • 【算法】算法初步
  • 在ComfyUI的python_embeded下编译安装module
  • Python数据可视化-Pandas绘图
  • Postman接口测试01|接口测试基础概念、http协议、RESTful风格、接口文档
  • Servlet详解
  • 4.7 数据库:单行子查询和多行子查询
  • python学习笔记—14—函数