参考博客:https://blog.csdn.net/wang_hugh/article/details/80760940
环境配置:
pip install Image
解决错误:
pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your path
办法:
pytesseract.py下:tesseract_cmd = ‘D:/Program Files/Tesseract-OCR/tesseract.exe’
安装tesseract.exe,记得路径一定要保持不变,要不需要更改tess_prefix的
代码如下:
import cv2 as cv
from PIL import Image
import pytesseract #要配置tesseract-ocr 引擎的
def recognize_text():
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (1, 6))#去除线
binl = cv.morphologyEx(binary, cv.MORPH_OPEN, kernel)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (5, 1))
open_out = cv.morphologyEx(binl, cv.MORPH_OPEN, kernel)
cv.bitwise_not(open_out, open_out)# 黑色背景变为白色背景
cv.imshow('open_out', open_out)
textImage = Image.fromarray(open_out)#从np.array 转换成<class 'PIL.Image.Image'>,pytesseract需要接受此类型
text = pytesseract.image_to_string(textImage)
print("This OK:%s"%text)
if __name__ == '__main__':
src = cv.imread("yzm.jpg")
cv.imshow("src", src)
recognize_text()
cv.waitKey(0)
cv.destroyAllWindows()
输入: