EasyPR源碼剖析（9）：字符識別

本文轉載自查看原文 2017-08-17 22:28 2126

在上一篇文章的介紹中，我們已經通過相應的字符分割方法，將車牌區域進行分割，得到7個分割字符圖塊，接下來要做的就是將字符圖塊放入訓練好的神經網絡模型，通過模型來預測每個圖塊所表示的具體字符。神經網絡的介紹和訓練過程我們將在下一節中具體介紹，本節主要介紹字符特征的提取，和如何通過訓練好的神經網絡模型來進行字符的識別。

字符識別主要是通過類CharsIdentify 來進行，對於中文字符和非中文字符，分別采取了不同的策略，訓練得到的ANN模型也不一樣，中文字符的識別主要使用 identifyChinese 來處理，非中文字符的識別主要采用 identify 來處理。另外，類CharsIdentify采用了單例模式，具體的初始化代碼和構造函數如下：

 1   CharsIdentify* CharsIdentify::instance_ = nullptr;
 2 
 3   CharsIdentify* CharsIdentify::instance() {
 4     if (!instance_) {
 5       instance_ = new CharsIdentify;
 6     }
 7     return instance_;
 8   }
 9 
10   CharsIdentify::CharsIdentify() {
11     ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(kDefaultAnnPath);
12     annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(kChineseAnnPath);
13     kv_ = std::shared_ptr<Kv>(new Kv);
14     kv_->load("etc/province_mapping");
15   }
16 
17   void CharsIdentify::LoadModel(std::string path) {
18     if (path != std::string(kDefaultAnnPath)) {
19 
20       if (!ann_->empty())
21         ann_->clear();
22 
23       ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
24     }
25   }
26 
27   void CharsIdentify::LoadChineseModel(std::string path) {
28     if (path != std::string(kChineseAnnPath)) {
29 
30       if (!annChinese_->empty())
31         annChinese_->clear();
32 
33       annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
34     }
35   }

View Code

這邊單例模式只考慮了單線程情況，對於多線程的話，需要加入雙重鎖定。此處處理中文字符和非中文字符，分別加載了不同的ANN模型文件，ANN模型通過opencv 中機器學習中自帶的神經網絡模型 ml::ANN_MLP 來實現。

字符特征獲取

通過神經網絡對字符圖塊進行判別，首先需要獲取字符圖塊的特征，字符特征的獲取，主要通過 charFeatures 函數來實現。具體的函數代碼如下所示：

 1 Mat charFeatures(Mat in, int sizeData) {
 2   const int VERTICAL = 0;
 3   const int HORIZONTAL = 1;
 4 
 5   // cut the cetner, will afect 5% perices.
 6   Rect _rect = GetCenterRect(in);
 7   Mat tmpIn = CutTheRect(in, _rect);
 8   //Mat tmpIn = in.clone();
 9 
10   // Low data feature
11   Mat lowData;
12   resize(tmpIn, lowData, Size(sizeData, sizeData));
13 
14   // Histogram features
15   Mat vhist = ProjectedHistogram(lowData, VERTICAL);
16   Mat hhist = ProjectedHistogram(lowData, HORIZONTAL);
17 
18   // Last 10 is the number of moments components
19   int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;
20 
21   Mat out = Mat::zeros(1, numCols, CV_32F);
22   // Asign values to
23 
24   int j = 0;
25   for (int i = 0; i < vhist.cols; i++) {
26     out.at<float>(j) = vhist.at<float>(i);
27     j++;
28   }
29   for (int i = 0; i < hhist.cols; i++) {
30     out.at<float>(j) = hhist.at<float>(i);
31     j++;
32   }
33   for (int x = 0; x < lowData.cols; x++) {
34     for (int y = 0; y < lowData.rows; y++) {
35       out.at<float>(j) += (float)lowData.at <unsigned char>(x, y);
36       j++;
37     }
38   }
39 
40   //std::cout << out << std::endl;
41 
42   return out;
43 }

View Code

對於中文字符和英文字符，默認的圖塊大小是不一樣的，中文字符默認是 20*20，非中文默認是10*10。

GetCenterRect 函數主要用於獲取字符的邊框，分別查找從四個角落查找字符的位置；
CutTheRect 函數裁剪原圖，即將字符移動到圖像的中間位置，通過這一步的操作，可將字符識別的准確率提高5%左右；
ProjectedHistogram 函數用於獲取歸一化序列，歸一化到0-1區間范圍內；

GetCenterRect 函數具體代碼如下：

 1 Rect GetCenterRect(Mat &in) {
 2   Rect _rect;
 3 
 4   int top = 0;
 5   int bottom = in.rows - 1;
 6 
 7   // find the center rect
 8 
 9   for (int i = 0; i < in.rows; ++i) {
10     bool bFind = false;
11     for (int j = 0; j < in.cols; ++j) {
12       if (in.data[i * in.step[0] + j] > 20) {
13         top = i;
14         bFind = true;
15         break;
16       }
17     }
18     if (bFind) {
19       break;
20     }
21 
22   }
23   for (int i = in.rows - 1;
24   i >= 0;
25   --i) {
26     bool bFind = false;
27     for (int j = 0; j < in.cols; ++j) {
28       if (in.data[i * in.step[0] + j] > 20) {
29         bottom = i;
30         bFind = true;
31         break;
32       }
33     }
34     if (bFind) {
35       break;
36     }
37 
38   }
39 
40 
41   int left = 0;
42   int right = in.cols - 1;
43   for (int j = 0; j < in.cols; ++j) {
44     bool bFind = false;
45     for (int i = 0; i < in.rows; ++i) {
46       if (in.data[i * in.step[0] + j] > 20) {
47         left = j;
48         bFind = true;
49         break;
50       }
51     }
52     if (bFind) {
53       break;
54     }
55 
56   }
57   for (int j = in.cols - 1;
58   j >= 0;
59   --j) {
60     bool bFind = false;
61     for (int i = 0; i < in.rows; ++i) {
62       if (in.data[i * in.step[0] + j] > 20) {
63         right = j;
64         bFind = true;
65 
66         break;
67       }
68     }
69     if (bFind) {
70       break;
71     }
72   }
73 
74   _rect.x = left;
75   _rect.y = top;
76   _rect.width = right - left + 1;
77   _rect.height = bottom - top + 1;
78 
79   return _rect;
80 }

View Code

CutTheRect 函數具體代碼如下：

 1 Mat CutTheRect(Mat &in, Rect &rect) {
 2   int size = in.cols;  // (rect.width>rect.height)?rect.width:rect.height;
 3   Mat dstMat(size, size, CV_8UC1);
 4   dstMat.setTo(Scalar(0, 0, 0));
 5 
 6   int x = (int) floor((float) (size - rect.width) / 2.0f);
 7   int y = (int) floor((float) (size - rect.height) / 2.0f);
 8 
 9   for (int i = 0; i < rect.height; ++i) {
10 
11     for (int j = 0; j < rect.width; ++j) {
12       dstMat.data[dstMat.step[0] * (i + y) + j + x] =
13           in.data[in.step[0] * (i + rect.y) + j + rect.x];
14     }
15   }
16 
17   //
18   return dstMat;
19 }

View Code

ProjectedHistogram 函數代碼如下：

 1 float countOfBigValue(Mat &mat, int iValue) {
 2   float iCount = 0.0;
 3   if (mat.rows > 1) {
 4     for (int i = 0; i < mat.rows; ++i) {
 5       if (mat.data[i * mat.step[0]] > iValue) {
 6         iCount += 1.0;
 7       }
 8     }
 9     return iCount;
10 
11   } else {
12     for (int i = 0; i < mat.cols; ++i) {
13       if (mat.data[i] > iValue) {
14         iCount += 1.0;
15       }
16     }
17 
18     return iCount;
19   }
20 }
21 
22 Mat ProjectedHistogram(Mat img, int t) {
23   int sz = (t) ? img.rows : img.cols;
24   Mat mhist = Mat::zeros(1, sz, CV_32F);
25 
26   for (int j = 0; j < sz; j++) {
27     Mat data = (t) ? img.row(j) : img.col(j);
28 
29     mhist.at<float>(j) = countOfBigValue(data, 20);
30   }
31 
32   // Normalize histogram
33   double min, max;
34   minMaxLoc(mhist, &min, &max);
35 
36   if (max > 0)
37     mhist.convertTo(mhist, -1, 1.0f / max, 0);   //歸一化 0-1
38 
39   return mhist;
40 }

View Code

通過上述代碼可知，非中文字符和中文字符獲得的字符特征個數是不同的，非中文字符features個數為 10+10+10*10=120，中文字符features個數為 20+20+20*20=440。

字符識別

通過上述函數獲取字符特征之后，可以通過神經網絡模型對車牌字符進行識別，具體的識別函數如下所示：

 1   int CharsIdentify::classify(cv::Mat f, float& maxVal, bool isChinses){
 2     int result = -1;
 3 
 4     cv::Mat output(1, kCharsTotalNumber, CV_32FC1);
 5     ann_->predict(f, output);
 6 
 7     maxVal = -2.f;
 8     if (!isChinses) {
 9       result = 0;
10       for (int j = 0; j < kCharactersNumber; j++) {
11         float val = output.at<float>(j);
12         // std::cout << "j:" << j << "val:" << val << std::endl;
13         if (val > maxVal) {
14           maxVal = val;
15           result = j;
16         }
17       }
18     }
19     else {
20       result = kCharactersNumber;
21       for (int j = kCharactersNumber; j < kCharsTotalNumber; j++) {
22         float val = output.at<float>(j);
23         //std::cout << "j:" << j << "val:" << val << std::endl;
24         if (val > maxVal) {
25           maxVal = val;
26           result = j;
27         }
28       }
29     }
30     //std::cout << "maxVal:" << maxVal << std::endl;
31     return result;
32   }

View Code

ann_為之前加載得到的神經網路模型，直接調用其 predict() 函數，即可得到輸出矩陣 output，輸出矩陣中最大的值即為識別的車牌字符，其中，數值分別為0-64的65個數字，對應的值如下所示：

static const char *kChars[] = {
  "0", "1", "2",
  "3", "4", "5",
  "6", "7", "8",
  "9",
  /*  10  */
  "A", "B", "C",
  "D", "E", "F",
  "G", "H", /* {"I", "I"} */
  "J", "K", "L",
  "M", "N", /* {"O", "O"} */
  "P", "Q", "R",
  "S", "T", "U",
  "V", "W", "X",
  "Y", "Z",
  /*  24  */
  "zh_cuan" , "zh_e"    , "zh_gan"  ,
  "zh_gan1" , "zh_gui"  , "zh_gui1" ,
  "zh_hei"  , "zh_hu"   , "zh_ji"   ,
  "zh_jin"  , "zh_jing" , "zh_jl"   ,
  "zh_liao" , "zh_lu"   , "zh_meng" ,
  "zh_min"  , "zh_ning" , "zh_qing" ,
  "zh_qiong", "zh_shan" , "zh_su"   ,
  "zh_sx"   , "zh_wan"  , "zh_xiang",
  "zh_xin"  , "zh_yu"   , "zh_yu1"  ,
  "zh_yue"  , "zh_yun"  , "zh_zang" ,
  "zh_zhe"
  /*  31  */
};

其中26個英文字母中，因為I 和 O容易和數字的 1和0 混淆，因此被去除了，后面31個中文字符分別對應中國的31個行政區域（港澳台暫不考慮）。將識別的各個字符整體輸出，就得到了最終的結果。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 EasyPR源碼剖析（8）：字符分割 EasyPR源碼剖析（1）：概述 EasyPR源碼剖析（2）：車牌定位 EasyPR源碼剖析（6）：車牌判斷之LBP特征 EasyPR源碼剖析（5）：車牌定位之偏斜扭轉 EasyPR源碼剖析（7）：車牌判斷之SVM EasyPR源碼剖析（3）：車牌定位之顏色定位 halcon學習_字符識別1 車牌識別LPR（八）-- 字符識別字符識別OCR原理及應用實現