首先本小工具使用C# winfrom 實現,其中主要是使用了百度智能雲OCR文字識別技術,調用期官網接口,很簡單,搭配NPOI Execl操作類庫,
利用Spire.pdf類庫,把pdf格式發票,轉換為png圖片格式。自動識別圖片、pdf格式發票,發票可以用高拍儀、手機拍照、掃面件等都可以識別。
其他說明:本程序借助百度智能雲API作為基礎的發票識別技術。
發票識別助手共分5個功能模塊,操作相對很簡單,第一步點擊添加發票按鈕,選擇要識別的發票信息。注意說明:目前圖片格式支持jpg、png、bmp,圖片的長和寬要求最短邊大於10px,
最長邊小於2048px;圖像編碼后大小必須小於4M,建議不要超過1M;第二步點擊識別發票按鈕,系統開始識別發票信息,識別完成后,發票信息會自動生成;
介紹一下關鍵的代碼:
一、獲取百度雲API token,這個是官方給的,直接拿過來用就可以了。
1 public static class AccessToken 2 3 { 4 // 百度雲中開通對應服務應用的 API Key 建議開通應用的時候多選服務 5 private static String clientId = ConfigurationManager.AppSettings.Get("APIKey"); 6 // 百度雲中開通對應服務應用的 Secret Key 7 private static String clientSecret = ConfigurationManager.AppSettings.Get("SecretKey"); 8 9 public static String getAccessToken() 10 { 11 String authHost = "https://aip.baidubce.com/oauth/2.0/token"; 12 HttpClient client = new HttpClient(); 13 List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>(); 14 paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials")); 15 paraList.Add(new KeyValuePair<string, string>("client_id", clientId)); 16 paraList.Add(new KeyValuePair<string, string>("client_secret", clientSecret)); 17 18 HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result; 19 String result = response.Content.ReadAsStringAsync().Result; 20 // Console.WriteLine(result); 21 22 AccessTokenInfo tokenInfo = JsonConvert.DeserializeObject<AccessTokenInfo>(result); 23 24 return tokenInfo.access_token; 25 } 26 } 27 28 public class AccessTokenInfo 29 { 30 public string refresh_token { get; set; } 31 public string expires_in { get; set; } 32 public string session_key { get; set; } 33 public string access_token { get; set; } 34 public string scope { get; set; } 35 public string session_secret { get; set; } 36 }
二、增值稅票識別請求過程和參數傳遞,也是官方給的例子,自己按照需求修改一下就可以了。
1 // 增值稅發票識別 2 public static string vatInvoice(string fileName) 3 { 4 string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token=" + token; 5 Encoding encoding = Encoding.Default; 6 System.Net.HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host); 7 request.Method = "post"; 8 request.KeepAlive = true; 9 // 圖片的base64編碼 10 string base64 = getFileBase64(fileName); 11 String str = "image=" + UrlEncode(base64); 12 byte[] buffer = encoding.GetBytes(str); 13 request.ContentLength = buffer.Length; 14 request.GetRequestStream().Write(buffer, 0, buffer.Length); 15 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 16 StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8); 17 string result = reader.ReadToEnd(); 18 return result; 19 } 20 21 public static String getFileBase64(String fileName) 22 { 23 FileStream filestream = new FileStream(fileName, FileMode.Open, System.IO.FileAccess.Read, FileShare.ReadWrite); 24 byte[] arr = new byte[filestream.Length]; 25 filestream.Read(arr, 0, (int)filestream.Length); 26 string baser64 = Convert.ToBase64String(arr); 27 filestream.Close(); 28 return baser64; 29 } 30 31 public static string UrlEncode(string str) 32 { 33 StringBuilder sb = new StringBuilder(); 34 byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默認System.Text.Encoding.Default.GetBytes(str) 35 for (int i = 0; i < byStr.Length; i++) 36 { 37 sb.Append(@"%" + Convert.ToString(byStr[i], 16)); 38 } 39 return (sb.ToString()); 40 }
三、這里的部分是把pdf格式的發票,自動轉換為png格式,提供出百度雲api需要的文件格式。
1 private ImageList GetImage(string[] files) 2 { 3 ImageList list = new ImageList(); 4 for (int i = 0; i < files.Length; i++) 5 { 6 list.Images.Add(files[i], Image.FromFile(files[i])); 7 list.ImageSize = new Size(80, 60); 8 } 9 return list; 10 } 11 12 private string[] GetImages() 13 { 14 OpenFileDialog ofd = new OpenFileDialog(); 15 ofd.Multiselect = true;//設置 選擇多個文件 16 ofd.InitialDirectory = @"C:\images\";//設置初始目錄 TODO:改為系統默認我的文檔中的圖片文件夾 17 ofd.Multiselect = true; 18 //ofd.Filter = "JPG(*.jpg)|*.jpg|JPEG(*.jpeg)|*.jpeg|PNG(*.png)|*.png|GIF(*.gif)|*.gif|所有文件(*.*)|*.*"; 19 ofd.Title = "請選擇要識別的發票的圖片"; 20 ofd.Filter = "圖片文件(*.jpg *.jpeg *.bmp *.png)|*.jpg;*.jpeg;*.bmp;*.png;*.pdf"; 21 if (ofd.ShowDialog() == DialogResult.OK && ofd.FileNames != null) 22 { 23 string[] files = ofd.FileNames; 24 //pdf文件轉換為png圖片文件 25 string imageName = ""; 26 for (int i = 0; i < files.Length; i++) 27 { 28 if (Path.GetExtension(files[i]).ToUpper().Contains(".PDF")) 29 { 30 imageName = Path.GetFileNameWithoutExtension(files[i]); 31 files[i] = Common.ConvertPDF2Image(files[i], imageName, 0, 1, ImageFormat.Png); 32 errMsg.AppendText(DateTime.Now.ToLongTimeString().ToString() + " 已將" + imageName + ".pdf自動轉換為png圖片格式\r\n"); 33 } 34 } 35 return files; 36 } 37 else 38 { 39 return null; 40 } 41 } 42 43 //格式化日期格式 44 public string fmartDate(string date) 45 { 46 date = date.Replace("年", "-"); 47 date = date.Replace("月", "-"); 48 date = date.Replace("日", ""); 49 return date; 50 }
四、獲取api返回的數據,輸出到dataGridView中。
1 private void 識別發票ToolStripMenuItem_Click(object sender, EventArgs e) 2 { 3 if (this.listView1.Items.Count == 0) 4 { 5 MessageBox.Show("請先選擇要識別的發票!", "消息提示", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation); 6 return; 7 } 8 9 Common.ShowProcessing("", this, (obj) => 10 { 11 //這里采用委托的方式解決線程卡死問題 12 this.Invoke(new Action(delegate 13 { 14 foreach (ListViewItem item in this.listView1.Items) 15 { 16 try 17 { 18 var invoiceInfo = JsonConvert.DeserializeObject<dynamic>(vatInvoice(item.SubItems[0].Name)); 19 var items = invoiceInfo.words_result; 20 if (items != null) 21 { 22 //寫入數據表格 23 int index = this.dataGridView1.Rows.Add(); 24 this.dataGridView1.Rows[index].Cells[0].Value = items.InvoiceType; 25 this.dataGridView1.Rows[index].Cells[1].Value = items.InvoiceCode; 26 this.dataGridView1.Rows[index].Cells[2].Value = items.InvoiceNum; 27 this.dataGridView1.Rows[index].Cells[3].Value = fmartDate((string)items.InvoiceDate); 28 this.dataGridView1.Rows[index].Cells[4].Value = items.SellerName; 29 this.dataGridView1.Rows[index].Cells[5].Value = items.SellerRegisterNum; 30 this.dataGridView1.Rows[index].Cells[6].Value = items.SellerAddress; 31 this.dataGridView1.Rows[index].Cells[7].Value = items.SellerBank; 32 this.dataGridView1.Rows[index].Cells[8].Value = Common.NumberToZero((string)items.TotalAmount); 33 this.dataGridView1.Rows[index].Cells[9].Value = "0"; 34 if (Common.IsPropertyExist(items, "CommodityTaxRate")) 35 { 36 if (!Common.IsNullOrEmpty(items.CommodityTaxRate[0].word)) 37 { 38 this.dataGridView1.Rows[index].Cells[9].Value = Common.NumberToZero((string)items.CommodityTaxRate[0].word.ToString().Replace("%", "")); 39 } 40 } 41 this.dataGridView1.Rows[index].Cells[10].Value = Common.NumberToZero((string)items.TotalTax); 42 this.dataGridView1.Rows[index].Cells[11].Value = items.AmountInFiguers; 43 this.dataGridView1.Rows[index].Cells[12].Value = items.InvoiceType.ToString().Contains("電子") ? "是" : "否"; 44 this.dataGridView1.Rows[index].Cells[13].Value = items.PurchaserName; 45 this.dataGridView1.Rows[index].Cells[14].Value = "一般計稅"; 46 Application.DoEvents(); 47 addMessage(item.SubItems[0].Text + " 識別完成!"); 48 } 49 else 50 { 51 if (invoiceInfo.error_code != null) 52 { 53 addMessage(item.SubItems[0].Text + " -->" + apiErrorMessage((string)invoiceInfo.error_code)); 54 } 55 } 56 } 57 catch (Exception err) 58 { 59 addMessage(item.SubItems[0].Text + err.Message + " 識別出錯,已跳過!"); 60 } 61 } 62 })); 63 //這里寫處理耗時的代碼,代碼處理完成則自動關閉該窗口 64 }, null); 65 66 }
五、導出發票明細到EXECL表格中。
1 private void 導出發票信息ToolStripMenuItem_Click(object sender, EventArgs e) 2 { 3 if (this.dataGridView1.Rows.Count == 0) 4 { 5 MessageBox.Show("發票列表信息為空,不能執行導出!", "消息提示", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation); 6 return; 7 } 8 9 string fileName = ""; 10 SaveFileDialog sfd = new SaveFileDialog(); 11 sfd.Filter = "導出發票Excel(*.xls)|*.xls"; 12 sfd.FileName = "發票明細 - " + DateTime.Now.ToString("yyyyMMddHHmmss"); 13 if (sfd.ShowDialog() == DialogResult.OK) 14 { 15 Common.ShowProcessing("正在導出,請稍候...", this, (obj) => 16 { 17 fileName = sfd.FileName; 18 HSSFWorkbook wb = new HSSFWorkbook(); 19 ISheet sheet = wb.CreateSheet("sheet1"); 20 int columnCount = dataGridView1.ColumnCount; //列數 21 int rowCount = dataGridView1.Rows.Count; //行數 22 for (int i = 0; i < columnCount; i++) 23 { 24 sheet.SetColumnWidth(i, 15 * 256); 25 } 26 //報表標題 27 IRow row = sheet.CreateRow(0); 28 row.HeightInPoints = 25; 29 ICell cell = row.CreateCell(0); 30 cell.SetCellValue("發票信息台賬"); 31 32 ICellStyle style = wb.CreateCellStyle(); 33 34 style.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Center; 35 style.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center; 36 37 style.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin; 38 style.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin; 39 style.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin; 40 style.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin; 41 style.FillBackgroundColor = HSSFColor.Black.Index; 42 style.FillForegroundColor = HSSFColor.White.Index; 43 44 IFont font = wb.CreateFont(); 45 font.FontName = "微軟雅黑"; 46 font.FontHeightInPoints = 12; 47 font.Boldweight = 700; 48 style.SetFont(font);//將新的樣式賦給單元格 49 cell.CellStyle = style; 50 sheet.AddMergedRegion(new CellRangeAddress(0, 0, 0, columnCount - 1)); 51 52 //表頭 53 IRow row1 = sheet.CreateRow(1); 54 row1.HeightInPoints = 20; 55 56 ICellStyle styleHead = wb.CreateCellStyle(); 57 styleHead.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Center; 58 styleHead.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center; 59 styleHead.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin; 60 styleHead.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin; 61 styleHead.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin; 62 styleHead.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin; 63 styleHead.FillBackgroundColor = HSSFColor.Black.Index; 64 styleHead.FillForegroundColor = HSSFColor.White.Index; 65 66 IFont font2 = wb.CreateFont(); 67 font2.FontName = "微軟雅黑"; 68 font2.FontHeightInPoints = 10; 69 font2.Boldweight = 500; 70 styleHead.SetFont(font2);//將新的樣式賦給單元格 71 72 for (int i = 0; i < columnCount; i++) 73 { 74 ICell row1cell = row1.CreateCell(i); 75 row1cell.SetCellValue(dataGridView1.Columns[i].HeaderText.ToString()); 76 row1cell.CellStyle = styleHead; 77 } 78 79 //明細行,從第三列開始 80 int rowindex = 2; 81 ICellStyle styleBody = wb.CreateCellStyle(); 82 styleBody.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Left; 83 styleBody.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center; 84 styleBody.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin; 85 styleBody.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin; 86 styleBody.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin; 87 styleBody.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin; 88 styleBody.FillBackgroundColor = HSSFColor.Black.Index; 89 styleBody.FillForegroundColor = HSSFColor.White.Index; 90 91 IFont font3 = wb.CreateFont(); 92 font3.FontName = "微軟雅黑"; 93 font3.FontHeightInPoints = 9; 94 font3.Boldweight = 500; 95 styleBody.SetFont(font3);//將新的樣式賦給單元格 96 for (int i = 0; i < rowCount; i++) 97 { 98 IRow datarow = sheet.CreateRow(rowindex); 99 datarow.Height = 300; 100 for (int j = 0; j < columnCount; j++) 101 { 102 ICell datacell_0 = datarow.CreateCell(j); 103 datacell_0.SetCellValue(this.dataGridView1.Rows[i].Cells[j].Value.ToString()); 104 datacell_0.CellStyle = styleBody; 105 } 106 rowindex += 1; 107 } 108 // 轉為字節數組 109 MemoryStream stream = new MemoryStream(); 110 wb.Write(stream); 111 var buf = stream.ToArray(); 112 113 //保存為Excel文件 114 using (FileStream fs = new FileStream(fileName, FileMode.Create, FileAccess.Write)) 115 { 116 fs.Write(buf, 0, buf.Length); 117 fs.Flush(); 118 MessageBox.Show("導出 EXECL 成功!", "消息提示"); 119 addMessage("導出發票信息到Execl表完成!"); 120 } 121 }, null); 122 } 123 }
操作說明如下: