tesseract-ocr 文本识别开发指南
简介
Tesseract是由Google公司开发的光学识别引擎,chat-gpt底层也使用的是Tesseract,本人在项目中使用该插件配合百度的Paddle-ocr进行文字识别,作用为进行文字倾斜度、旋转角度的识别,如下:
参考资料:
Tesseract 安装、使用、训练模型教程简介
Tesseract 安装与环境变量配置
Linux环境搭建OpenCV运行java-cv代码
tesseract-ocr 的使用
Tesseract java
Python的调用:
上面是环境的搭建和使用,下面是关于Tesseract的Python调用方法,以获取图片的旋转角度为例:
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
print(sys.path)
import uvicorn
import cv2
from pytesseract import Output
import pytesseract
from fastapi import FastAPI, Request, Form, UploadFile, File
from paddleocr import PaddleOCR, PPStructure
import numpy as np
from starlette.responses import FileResponse, StreamingResponse
from fastapi.responses import JSONResponse
import uuidocr_sever = FastAPI()#0.95 用于 ocr识别 1 用于 版面分析scaling_ocr = 0.95
scaling_structure = 1def rotate_bound(image, angle, scaling):(h, w) = image.shape[:2](cX, cY) = (w / 2, h / 2)# 抓住旋转矩阵(应用角度的负数顺时针旋转),然后抓住正弦和余弦(即矩阵的旋转分量M = cv2.getRotationMatrix2D((cX, cY), -angle, scaling)cos = np.abs(M[0, 0])sin = np.abs(M[0, 1])# compute the new bounding dimensions of the image 计算图像的新边界尺寸nW = int((h * sin) + (w * cos))nH = int((h * cos) + (w * sin))# adjust the rotation matrix to take into account translation 调整旋转矩阵以考虑平移M[0, 2] += (nW / 2) - cXM[1, 2] += (nH / 2) - cY# perform the actual rotation and return the image 执行实际旋转并返回图像return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))# 流程整合成一体
@ocr_sever.post("/imgInfos/")
async def img_infos(fileName: str = Form(...)):print("输入文件名为:{}".format(fileName))# 拼接成 图片的 相对路径file_path = "./doc/imgs/"+fileName;image = cv2.imread(file_path)# 二值化后的图像 识别率会提高rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)results = pytesseract.image_to_osd(rgb, output_type=Output.DICT)# 然后进行图像的旋转rotated_ocr = rotate_bound(image, angle=results["rotate"], scaling=scaling_ocr)rotated_structure = rotate_bound(image, angle=results["rotate"], scaling=scaling_structure)# 然后进行 paddle的识别ocr = PaddleOCR(use_angle_cls=True, lang="ch")result = ocr.ocr(rotated_ocr, cls=True)#result2 = ocr.ocr(image, cls=True)print(result)#print(result2)# 然后进行 ppstructure 版面分析table_engine = PPStructure(show_log=True, type='structure', image_orientation=True)structResult = table_engine(rotated_structure)struct = []for line in structResult:# 去除 img元素line.pop('img')print(line)struct.append(line)#然后把 两个结果 打包成 json 进行返回data = {"ocr": result, "structure": struct}return JSONResponse(data);# 获取图片的偏转角度
@ocr_sever.post("/imgAngle/")
async def img_angle(file: UploadFile = File(...)):print("imgAngle 输入文件名为:{}".format(file.filename))file_path = "./doc/imgs/"+file.filenamewith open(file_path, 'wb') as f:f.write(await file.read())image = cv2.imread(file_path)# 二值化后的图像 识别率会提高rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)results = pytesseract.image_to_osd(rgb, output_type=Output.DICT)return results# 获取翻转后图片
@ocr_sever.post("/imgRotate/")
async def img_rotate(file: UploadFile = File(...)):print("输入文件名为:{}".format(file.filename))file_path = "./doc/imgs/"+file.filenamewith open(file_path, 'wb') as f:f.write(await file.read())# 二值化后的图像 识别率会提高image = cv2.imread(file_path)rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)results = pytesseract.image_to_osd(rgb, output_type=Output.DICT)# 然后进行图像的旋转rotated = rotate_bound(image, angle=results["rotate"], scaling=scaling_ocr)newFilePath = "./doc/imgs/"+str(uuid.uuid1())+".jpg"cv2.imwrite(newFilePath, rotated)response = StreamingResponse(get_file_byte(newFilePath))return responsedef get_file_byte(filename): # filename可以是文件,也可以是压缩包with open(filename, "rb") as f:while True:content = f.read(1024)if content:yield contentelse:break# 获取 paddleocr的解析结果 就是原先的接口
if __name__ == "__main__":print('开始加载orc')host = '0.0.0.0'port = 9999workers = 1# 这里一定要改 文件名 test04uvicorn.run(app='test04:ocr_sever',host=host,port=int(port))
其中核心代码其实就一段,其他的是对它的综合应用
pytesseract.image_to_osd(rgb, output_type=Output.DICT)