OpenCV--文檔掃描OCR識別


scan.py:

# 導入工具包
import numpy as np
import argparse
import cv2

# 設置參數
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True,
    help = "Path to the image to be scanned") 
args = vars(ap.parse_args())

def order_points(pts):
    # 一共4個坐標點
    rect = np.zeros((4, 2), dtype = "float32")

    # 按順序找到對應坐標0123分別是 左上,右上,右下,左下
    # 計算左上,右下
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # 計算右上和左下
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def four_point_transform(image, pts):
    # 獲取輸入坐標點
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 計算輸入的w和h值
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # 變換后對應坐標位置
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")

    # 計算變換矩陣
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # 返回變換后結果
    return warped

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

# 讀取輸入
image = cv2.imread(args["image"])
#坐標也會相同變化
ratio = image.shape[0] / 500.0
orig = image.copy()


image = resize(orig, height = 500)

# 預處理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)

# 展示預處理結果
print("STEP 1: 邊緣檢測")
cv2.imshow("Image", image)
cv2.imshow("Edged", edged)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 輪廓檢測
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]

# 遍歷輪廓
for c in cnts:
    # 計算輪廓近似
    peri = cv2.arcLength(c, True)
    # C表示輸入的點集
    # epsilon表示從原始輪廓到近似輪廓的最大距離,它是一個准確度參數
    # True表示封閉的
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)

    # 4個點的時候就拿出來
    if len(approx) == 4:
        screenCnt = approx
        break

# 展示結果
print("STEP 2: 獲取輪廓")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv2.imshow("Outline", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 透視變換
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)

# 二值處理
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# 展示結果
print("STEP 3: 變換")
cv2.imshow("Original", resize(orig, height = 650))
cv2.imshow("Scanned", resize(ref, height = 650))
cv2.waitKey(0)

效果:

利用tesseract工具識別出字符:

# https://digi.bib.uni-mannheim.de/tesseract/
# 配置環境變量如E:\Program Files (x86)\Tesseract-OCR
# tesseract -v進行測試
# tesseract XXX.png 得到結果 
# pip install pytesseract
# anaconda lib site-packges pytesseract pytesseract.py
# tesseract_cmd 修改為絕對路徑即可
from PIL import Image
import pytesseract
import cv2
import os

preprocess = 'blur' #thresh

image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

if preprocess == "thresh":
    gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

if preprocess == "blur":
    gray = cv2.medianBlur(gray, 3)
    
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
    
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)

cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)                                   

效果:

we owe oak wk ome owe ow wo Sk we %o %o %K

 

WHOLE FOODS MARKET - WESTPORT,.CT 06880
399 POST RD WEST - (203) 227-6858

64
365
365

365

BACULN LS
BACON LS
BACON LS
BACON iS
BRO TH CHIC

FLOUR ALMUNU
CHKN BRST BNLSS SK
HEAVY CREAM

BALSMC REDUCT

BEEF

GRND
JUICE COF CRSHEW

85/15

L.

DOCS PINT QORGAK IC
HNY ALMOND Bui TR

* x ## TAX

. 00

BAL

NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP

4 99
4.99
4.99
1 39
2.19
1.99
. 80
. 39
. 49

tl &

on

8.99

14.49

9.99
101.33

m

"Ti

m n m


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM