經過前面的步驟,現在已經得到一個407 * 407的正方形圖片,但是這個圖片的不一定是正確的方向,即左上、左下、右上存在三個定位點。我們需要對這個圖片進行旋正處理。
首先對圖片進行一個二值化處理,在這個步驟,考慮到圖片可能因為光照的原因,二維碼部分存在較大亮度差,導致灰度值差異較大,我選擇使用自適應閾值二值化方式進行處理
threshed_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 71,
-20)

然后找定位圖形,按照二維碼生成規范,每個二維碼圖形應當有三個完整的定位圖形,分別位於左上、左下、右上角。
按照之前看的網上的方案,是通過輪廓關系來尋找,即:
1.定位圖形的輪廓具有兩層子輪廓
2.子輪廓/父輪廓面積比在0.5左右((5 * 5) / (7 * 7))
但是這個方案對圖像要求很高,因為很多二維碼圖形本身是斷續的,無法找到連續的輪廓,所以這個方案不是很實用。
《干貨 | 史上最全的支付寶二維碼掃碼優化技術方案》:https://yq.aliyun.com/articles/599505?utm_content=m_1000000228
還有一種方案是支付寶的同學分享的,基於掃描線段關系,按照二維碼的規范,生成的定位圖像的邊寬度比為1:1:3:1:1,如果可以找到符合這個線段寬度比例且顏色組合正確,基本可以認定是找到了定位點。但是這種方案在實際的計算過程中遇到了一個問題,就是二維碼的邊長隨機性太大,按照預期,每個邊長應當在11px左右,但是實際的掃描過程中,經常出現20個px的超寬邊,也出現過5px的超窄邊,使用寬度比例的方案效果欠佳,代碼實現如下:
def scan_location(image_cut):
image_cut_copy = np.copy(image_cut)
cv2.line(image_cut_copy, (35, 0), (35, 100), (0, 0, 0), 1)
cv2.line(image_cut_copy, (0, 35), (100, 35), (0, 0, 0), 1)
vertical_flag = False
horizental_flag = False
noise_length = 1
vertical_count = 0
horizental_count = 0
rate_1_1_min = 0.45
rate_1_1_max = 2.2
rate_3_1_min = 2
rate_3_1_max = 5
width_1_min = 5
width_1_max = 20
width_3_min = 25
width_3_max = 50
width_7_min = 60
width_7_max = 100
# 循環中,i是y軸,j是x軸
# 計算橫向
for i in range(0, 99):
line_array = []
begin_index = -1
end_index = -1
color = -1
for j in range(0, 100):
if begin_index == -1:
begin_index = 0
color = image_cut[i][j]
if j == 99:
end_index = j
if color != image_cut[i][j]:
end_index = j - 1
if begin_index != -1 and end_index != -1:
line_array.append([begin_index, end_index, color])
begin_index = j
end_index = -1
color = image_cut[i][j]
# 過濾噪聲
# 線條長度小於噪聲長度,認為是噪聲,進行過濾
filter_line_array = []
for k in range(0, len(line_array)):
if k == 0:
filter_line_array.append(line_array[k])
elif line_array[k][1] - line_array[k][0] <= noise_length:
if k != len(line_array) - 1:
filter_line_array[len(filter_line_array) - 1][1] = line_array[k + 1][1]
else:
filter_line_array.append(line_array[k])
# 判斷線條關系是否符合1:1:3:1:1
# 顏色關系為黑白黑白黑
# 如果線條數量少於5,調過本行掃描
if len(filter_line_array) < 5:
continue
for k in range(0, len(filter_line_array) - 4):
line1_width = filter_line_array[k][1] - filter_line_array[k][0]
line1_color = filter_line_array[k][2]
line2_width = filter_line_array[k + 1][1] - filter_line_array[k + 1][0]
line2_color = filter_line_array[k + 1][2]
line3_width = filter_line_array[k + 2][1] - filter_line_array[k + 2][0]
line3_color = filter_line_array[k + 2][2]
line4_width = filter_line_array[k + 3][1] - filter_line_array[k + 3][0]
line4_color = filter_line_array[k + 3][2]
line5_width = filter_line_array[k + 4][1] - filter_line_array[k + 4][0]
line5_color = filter_line_array[k + 4][2]
line_all_width = line1_width + line2_width + line3_width + line4_width + line5_width;
# 黑1:白1
if line1_color != 0 or line2_color != 255 or not (
line1_width / line2_width > rate_1_1_min and line1_width / line2_width < rate_1_1_max):
continue;
# 2:黑3
if line3_color != 0 or not (
line3_width / line2_width > rate_3_1_min and line3_width / line2_width < rate_3_1_max):
continue;
# 5:白1
if line4_color != 255 or not (
line4_width / line2_width > rate_1_1_min and line4_width / line2_width < rate_1_1_max):
continue;
# 6:黑1
if line5_color != 0 or not (
line5_width / line1_width > rate_1_1_min and line5_width / line1_width < rate_1_1_max):
continue;
# 顏色和寬度判定
if line1_color != 0 or line3_color != 0 or line5_color != 0 or line2_color != 255 or line4_color != 255 or not (
line1_width >= width_1_min and line1_width <= width_1_max) or not (
line2_width >= width_1_min and line2_width <= width_1_max) or not (
line4_width >= width_1_min and line4_width <= width_1_max) or not (
line5_width >= width_1_min and line5_width <= width_1_max) or not (
line3_width >= width_3_min and line3_width <= width_3_max) or not (
line_all_width >= width_7_min and line_all_width <= width_7_max):
continue
horizental_count += 1
if horizental_count > 10:
horizental_flag = True
break
# 計算縱向
for j in range(0, 100):
line_array = []
last_node_num = -1
begin_index = -1
end_index = -1
color = -1
for i in range(0, 100):
if begin_index == -1:
begin_index = 0
color = image_cut[i][j]
if i == 99:
end_index = i
if color != image_cut[i][j]:
end_index = i - 1
if begin_index != -1 and end_index != -1:
line_array.append([begin_index, end_index, color])
begin_index = i
end_index = -1
color = image_cut[i][j]
# 過濾噪聲
# 線條長度小於噪聲長度,認為是噪聲,進行過濾
filter_line_array = []
for k in range(0, len(line_array)):
if k == 0:
filter_line_array.append(line_array[k])
elif line_array[k][1] - line_array[k][0] <= noise_length:
if k != len(line_array) - 1:
filter_line_array[len(filter_line_array) - 1][1] = line_array[k + 1][1]
else:
filter_line_array.append(line_array[k])
# 判斷線條關系是否符合1:1:3:1:1
# 顏色關系為黑白黑白黑
# 如果線條數量少於5,調過本行掃描
if len(filter_line_array) < 5:
continue
for k in range(0, len(filter_line_array) - 4):
line1_width = filter_line_array[k][1] - filter_line_array[k][0]
line1_color = filter_line_array[k][2]
line2_width = filter_line_array[k + 1][1] - filter_line_array[k + 1][0]
line2_color = filter_line_array[k + 1][2]
line3_width = filter_line_array[k + 2][1] - filter_line_array[k + 2][0]
line3_color = filter_line_array[k + 2][2]
line4_width = filter_line_array[k + 3][1] - filter_line_array[k + 3][0]
line4_color = filter_line_array[k + 3][2]
line5_width = filter_line_array[k + 4][1] - filter_line_array[k + 4][0]
line5_color = filter_line_array[k + 4][2]
# 黑1:白1
if line1_color != 0 or line2_color != 255 or not (
line1_width / line2_width > rate_1_1_min and line1_width / line2_width < rate_1_1_max):
continue;
# 2:黑3
if line3_color != 0 or not (
line3_width / line2_width > rate_3_1_min and line3_width / line2_width < rate_3_1_max):
continue;
# 5:白1
if line4_color != 255 or not (
line4_width / line2_width > rate_1_1_min and line4_width / line2_width < rate_1_1_max):
continue;
# 6:黑1
if line5_color != 0 or not (
line5_width / line1_width > rate_1_1_min and line5_width / line1_width < rate_1_1_max):
continue;
# 顏色和寬度判定
if line1_color != 0 or line3_color != 0 or line5_color != 0 or line2_color != 255 or line4_color != 255 or not (
line1_width >= width_1_min and line1_width <= width_1_max) or not (
line2_width >= width_1_min and line2_width <= width_1_max) or not (
line4_width >= width_1_min and line4_width <= width_1_max) or not (
line5_width >= width_1_min and line5_width <= width_1_max) or not (
line3_width >= width_3_min and line3_width <= width_3_max) or not (
line_all_width >= width_7_min and line_all_width <= width_7_max):
continue
vertical_count += 1
if vertical_count > 10:
vertical_flag = True
break
if vertical_flag and horizental_flag:
return "all"
elif vertical_flag or horizental_flag:
return "half"
else:
return "none"
由於線段經常出現斷點,每次掃描還需要考慮當前的分割是真正的白色線段還是發生的白色噪聲,噪聲還需要將前后線段續接。並且由於線段寬度不確定,寫了很多預定義的閾值,整體代碼結構較亂,而且需要經常調閾值來適配。
換一個思考方式,我們知道我們需要在四個邊角去找定位圖形,我先把四個邊角切分出來,作為待掃描目標
定位圖形具有以下特征:中心是一個黑色方塊,中圈是一個白色方框,外圈是一個黑色方框。
從目前搜集到的發票來看,黑色部分大都存在斷線的情況,但是白色區域沒有出現被黑色部分的溢出占滿的情況,所以,當我們掃描到一個方框,邊距合適,且線條顏色為純白時,基本可以認定是中心中圈的白色方框。
此時基本可以認定當前是一個定位點。但是由於發票存在很多定位點缺損的情況,即從左側開始打印的時候,左側的二維碼部分存在缺失,導致定位點缺損,這種情況我們需要進行特殊處理,所以,我們還需要判定,當前這個定位點是一個完整的定位點還是一個殘損的定位點。
完整的定位點還滿足一個條件,即中圈白框外面必然存在一個黑色方框,所以,當我們以白框坐標為基准,掃描周邊的時候,如果可以掃描到一個線框,平均顏色趨近於黑色,我們就可以認定為這是一個完整的定位點。
當掃描到完整定位點時,返回all,掃描到殘損定位點時,返回half,未掃描到是,返回none
構造多個白框寬度進行掃描,避免由於圖像打印不標准而掃描寬度一成不變導致的漏掃
# 根據白邊掃描定位點
def scan_location_white_edge(image_cut):
# 構造48、45、42、39、36的兩個白格子進行掃描
scan_result = scan_location_white_edge_once(image_cut, 48)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 45)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 42)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 39)
if scan_result == "all" or scan_result == "half":
return scan_result
scan_result = scan_location_white_edge_once(image_cut, 36)
if scan_result == "all" or scan_result == "half":
return scan_result
return "none"
單次掃描,每次掃描到白框后,都基於當前的白框寬度,加上5種間隔寬度進行掃描,避免漏掃
# 根據白邊掃描定位點,單次處理
def scan_location_white_edge_once(image_cut, window_width):
# 通過圖形的平均黑度判斷當前是否存在
# 算法不太靠譜
image_color = max(image_cut.mean() - 80, 15)
# print(image_color)
# 設置白邊黑邊間隔
gap_1 = 5
gap_2 = 8
gap_3 = 11
gap_4 = 14
gap_5 = 17
sum_difference = (window_width * window_width - (window_width - 2) * (window_width - 2)) * 255
for i in range(0, 100 - window_width):
for j in range(0, 100 - window_width):
outer_color = image_cut[i:i + window_width, j:j + window_width].sum()
inner_color = image_cut[i + 1:i + (window_width - 1), j + 1:j + (window_width - 1)].sum()
if outer_color - inner_color == sum_difference:
# 找到白邊之后對黑邊進行掃描
for black_gap in (gap_3,gap_2,gap_4,gap_1,gap_5):
for k in range(0, 100 - window_width - black_gap * 2):
for l in range(0, 100 - window_width - black_gap * 2):
outer_color = image_cut[k:k + window_width + black_gap * 2,
l:l + window_width + black_gap * 2].sum()
inner_color = image_cut[k + 1:k + window_width + black_gap * 2 - 1,
l + 1:l + window_width + black_gap * 2 - 1].sum()
# 如果找到四邊的顏色均值小50,即只有1/5是白色線段,說明當前是一個黑框,返回all
if (outer_color - inner_color) / ((window_width + black_gap * 2) * 4) < image_color:
return "all"
return "half"
return "none"
當掃描出所有的定位點之后,根據定位點的關系進行圖像旋轉處理
根據發票二維碼打印的實際情況,一般會出現以下幾種情況:
三個all,即左上,左下,右上
兩個half一個all,左上、左下的half,右上的all
一個all,右上的all
除了這幾種情況,其他的情況都屬於不確定,統一按照一個規則進行圖像旋轉
根據以上收集到的定位點情況,進行圖像旋轉,由於使用opencv的旋轉api可能會導致出現1px的黑邊或者白邊,所以繼續使用投影變換進行旋轉
# 將圖片的定位點旋轉正
def upright_image(self, image):
# 掃描定位點
(all_array, half_array) = loc.scan_all_location(image)
all_set = set(all_array)
half_set = set(half_array)
# 如果有三個all的點,按照左上,左下,右上來計算旋轉角度
rotation_angle = 0
defect_flag = False
# 如果all + half 數量為3,按照缺損腳進行旋轉
if len(all_array) + len(half_array) == 3:
if not "left_top" in all_array and not "left_top" in half_array:
rotation_angle = 180
elif not "left_bottom" in all_array and not "left_bottom" in half_array:
rotation_angle = 90
elif not "right_top" in all_array and not "right_top" in half_array:
rotation_angle = 270
# 如果有一個all或者一個half,按照右上來計算旋轉角度
elif len(all_array) == 1 or len(all_array) == 0 and len(half_array) == 1:
if len(all_array) == 1:
location = all_array[0]
else:
location = half_array[0]
if location == "left_top":
rotation_angle = 270
elif location == "left_bottom":
rotation_angle = 180
elif location == "right_bottom":
rotation_angle = 90
else:
rotation_angle = 90
if len(half_array) != 0 or (len(half_array) == 0 and len(all_array) == 1):
# 設置缺損狀態為True
defect_flag = True
if self.trace_image:
cv2.imwrite(self.trace_path + "401_rotation_source_" + self.image_name, image)
# 旋轉會補出黑邊,修改為投影變換
# upright_rotation = cv2.getRotationMatrix2D((407 / 2, 407 / 2), rotation_angle, 1)
# # 第三個參數是輸出圖像的尺寸中心
# uprighted_image = cv2.warpAffine(image, upright_rotation, (407, 407))
# 生成透視變換矩陣
source_position = np.float32([[0, 0], [0, 406], [406, 406], [406, 0]])
if rotation_angle == 0:
uprighted_image = image
elif rotation_angle == 90:
target_position = np.float32([[0, 406], [406, 406], [406, 0], [0, 0]])
elif rotation_angle == 180:
target_position = np.float32([[406, 406], [406, 0], [0, 0], [0, 406]])
elif rotation_angle == 270:
target_position = np.float32([[0, 406], [406, 406], [406, 0], [0, 0]])
if rotation_angle != 0:
transform = cv2.getPerspectiveTransform(source_position, target_position)
# 進行透視變換
uprighted_image = cv2.warpPerspective(image, transform, (407, 407))
if self.trace_image:
cv2.imwrite(self.trace_path + "401_rotation_target_" + self.image_name, uprighted_image)
return uprighted_image, defect_flag
旋正后的圖片如圖所示:

至此,我們已經得到了一個旋正的正方形,可以基於這個圖形進行一些識別算法的編寫了
