TesseractEngine ocr; ocr = new TesseractEngine("./tessdata", "chi_sim");//設置語言 中文 //ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube);//設置語言 英文 //ocr = new TesseractEngine("./tessdata", "jpn");//設置語言 日語
源碼下載:https://download.csdn.net/download/horseroll/10739546 源碼下包含部分語言包,所以文件比較大
先上效果圖。測試中文英文日語識別,其他語言也都行,只要下載相應的語言包,操作使用后面都有講
1.首先在Nuget中搜索Tesseract,下載到項目中
2.下載相應的語言包放至Debug/tessdata文件夾下,Tesseract語言包下載地址:https://github.com/tesseract-ocr/tesseract/wiki/Data-Files#data-files-for-version-302
3.代碼操作
首先先初始化類,設置語言
導入圖片進行識別
Bitmap bit = new Bitmap(Image.FromFile(filename.FileName.ToString()));
//bit = PreprocesImage(bit);//進行圖像處理,如果識別率低可試試
Page page = ocr.Process(bit);
string str = page.GetText();//識別后的內容
page.Dispose();
圖片處理算法,如果是識別數字,識別率低可以試試這個方法
/// <summary> /// 圖片顏色區分,剩下白色和黑色 /// </summary> /// <param name="image"></param> /// <returns></returns> private Bitmap PreprocesImage(Bitmap image) { //You can change your new color here. Red,Green,LawnGreen any.. Color actualColor; //make an empty bitmap the same size as scrBitmap image = ResizeImage(image, image.Width * 5, image.Height * 5); //image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_Resize.jpg"); Bitmap newBitmap = new Bitmap(image.Width, image.Height); for (int i = 0; i < image.Width; i++) { for (int j = 0; j < image.Height; j++) { //get the pixel from the scrBitmap image actualColor = image.GetPixel(i, j); // > 150 because.. Images edges can be of low pixel colr. if we set all pixel color to new then there will be no smoothness left. if (actualColor.R > 23 || actualColor.G > 23 || actualColor.B > 23)//在這里設置RGB newBitmap.SetPixel(i, j, Color.White); else newBitmap.SetPixel(i, j, Color.Black); } } return newBitmap; } /// <summary> /// 調整圖片大小和對比度 /// </summary> /// <param name="image"></param> /// <param name="width"></param> /// <param name="height"></param> /// <returns></returns> private Bitmap ResizeImage(Image image, int width, int height) { var destRect = new Rectangle(0, 0, width, height); var destImage = new Bitmap(width, height); destImage.SetResolution(image.HorizontalResolution, image.VerticalResolution * 2);//2,3 //image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_HighRes.jpg"); using (var graphics = Graphics.FromImage(destImage)) { graphics.CompositingMode = CompositingMode.SourceOver; graphics.CompositingQuality = CompositingQuality.HighQuality; graphics.InterpolationMode = InterpolationMode.HighQualityBicubic; graphics.SmoothingMode = SmoothingMode.HighQuality; graphics.PixelOffsetMode = PixelOffsetMode.HighQuality; using (var wrapMode = new ImageAttributes()) { wrapMode.SetWrapMode(WrapMode.Clamp); graphics.DrawImage(image, destRect, 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, wrapMode); } } return destImage; }
轉載:https://blog.csdn.net/HorseRoll/article/details/83310677?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.channel_param
Tesseract4配置與示例
https://blog.csdn.net/jumencibaliang92/article/details/82150883