功能介紹:
提取指定文件夾下的所有增值稅發票(格式為jpg或png或pdf(暫時只處理第1頁)),把所有信息寫到Excel表當前選中的單元格,並重命名原始發票(可指定規則)復制到新文件夾。
由於要用到百度的接口,所以需要注冊百度智能雲+實名認證+創建應用+領取資源
- https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
- 實名認證
- 創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 appid apikey secretkey
- 領取相應的資源:文字識別→概覽→右側【領取免費資源】→選中【財務票據OCR】→全部→0元領取
- 等待資源到賬:右鍵【資源列表】→已領取資源→核實是否擁有資源
使用步驟:
- 打開任意Excel表,選中第一個要填的單元格
- 讀取腳本說明,修改相應內容后運行即可
下面是AutoHotkey v2 beta版代碼
;注冊百度智能雲+實名認證+創建應用+領取資源(財務)
; https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
; 實名認證
; 創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 apikey secretkey
; 領取相應的資源:
; 文字識別→概覽→右側【領取免費資源】→選中【財務票據OCR】→全部→0元領取
; 等待資源到賬:右鍵【資源列表】→已領取資源
;NOTE 搜索 hymodify 修改相應信息
;功能:
; 提取 dn0 文件夾電子發票信息(pdf只提取第1頁),並寫到當前已打開Excel表(從【當前選中單元格】開始寫)
#SingleInstance force
if (!ProcessExist("Excel.exe")) {
msgbox("請打開Excel並選中第一個要寫入單元格",,0x40000)
ExitApp
}
if (0) {
dn0 := "c:\Users\Administrator\Desktop\11" ;hymodify 【舊】發票文件夾
dn1 := "c:\Users\Administrator\Desktop\22" ;hymodify 【新】發票文件夾(發票重命名后復制到此文件夾)
if !DirExist(dn1)
DirCreate(dn1)
} else {
dn0 := DirSelect(, 2, "選擇【舊】發票文件夾")
if (!strlen(dn0))
ExitApp
dn1 := DirSelect(, 2, "選擇【新】發票文件夾")
}
arrOcr := [
["發票代碼","InvoiceCode"],
["發票號碼","InvoiceNum"],
["開票日期","InvoiceDate"],
["校驗碼","CheckCode"],
["機器編號","MachineCode"],
["金額","AmountInFiguers"],
["服務名稱1","CommodityName"],
["稅率1","CommodityTaxRate"],
["稅額1","CommodityTax"],
["大寫金額","AmountInWords"],
["銷售方名稱","SellerName"],
["銷售方納稅人識別號","SellerRegisterNum"],
["銷售方地址","SellerAddress"],
["銷售方開戶行","SellerBank"],
["購買方名稱","PurchaserName"],
["購買方納稅人識別號","PurchaserRegisterNum"],
["購買方地址","PurchaserAddress"],
["購買方開戶行","PurchaserBank"],
]
arrOther := [
"新文件名", ;依賴 objOcr 結果
"原文件名",
"序號",
]
csOcr := arrOcr.length
cs := csOcr+arrOther.length
arrA := ComObjArray(12, 1, cs)
xl := ox()
st := xl.ActiveSheet
ac := xl.ActiveCell
r := 0
arrError := []
if (ac.row == 1) { ;在第1行,則初始化並寫入標題
st.cells.NumberFormat := "@"
;設置標題
for _, arr in arrOcr
arrA[0,A_Index-1] := arr[1]
for _, v in arrOther
arrA[0,csOcr+A_Index-1] := v
ac.resize(1,cs).value := arrA ;要寫的第1行
rng1 := ac.offset(1).resize(1,cs)
} else
rng1 := ac.resize(1,cs)
loop files, dn0 . "\*.*", "RF" { ;hymodify 帶R會處理子文件夾
if (A_LoopFileAttrib ~= "[HS]")
continue
if !(A_LoopFileName ~= "i)\.(pdf|jpg|png)") ;hymodify 過濾文件格式
continue
tooltip(A_Index . "`n" . A_LoopFileName)
objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath)
; hyf_objView(objOcr)
arrA := ComObjArray(12, 1, cs) ;每行寫一次
;寫入 ocr 內容
noExt := ""
if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功獲取結果
for _, arr in arrOcr {
res := objOcr[arr[2]]
if (isobject(res)) {
if (res.length)
arrA[0,A_Index-1] := res[1]["word"]
} else
arrA[0,A_Index-1] := res
}
noExt := format("{1}-{2}", delete0(objOcr["AmountInFiguers"]),objOcr["InvoiceNum"]) ;hymodify 新文件名規則,默認是(金額-發票號碼)
arrA[0,csOcr] := noExt
}
;常規內容
arrA[0,csOcr+1] := A_LoopFileName
arrA[0,csOcr+2] := r+1
;arrA寫到整行
rng1.offset(r).value := arrA
r++
;復制文件
if strlen(dn1) {
if (strlen(noExt)) {
SplitPath(A_LoopFileFullPath, &fn,, &ext)
try
FileCopy(A_LoopFileFullPath, format("{1}\{2}.{3}", dn1,noExt,ext))
catch
arrError.push(A_LoopFileName)
} else {
SplitPath(A_LoopFileFullPath, &fn,, &ext, &noExt)
FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext))
}
}
}
WinActivate("ahk_id " . st.application.hwnd)
tooltip
if arrError.length
msgbox("以下文件復制時出錯了,請核實`n`n" . json.stringify(arrError, 4))
else
msgbox("已完成",,0x40000)
ExitApp
ox(winTitle:="ahk_class XLMAIN") {
if WinExist(winTitle)
ctlID := ControlGetHwnd("EXCEL71")
else
return ComObject("Excel.application")
numput('Int64',0x0000000000020400, 'Int64',0x46000000000000C0, IID_IDispatch:=buffer(16))
dllcall("oleacc\AccessibleObjectFromWindow", "ptr",ctlID, "uint",0xFFFFFFF0, "ptr",IID_IDispatch, "ptr*",win:=ComValue(9,0), 'HRESULT')
loop {
try
return win.application
catch
ControlSend("{escape}", "EXCEL71")
}
}
delete0(num) {
if (num ~= "^-?\d+\.\d+$") {
if (num ~= "\.\d{8,}$") ;小數位太多的異常
num := round(num+0.00000001, 6)
return rtrim(RegExReplace(num, "\.\d*?\K0+$"), ".")
} else
return num
}
class _Web {
;來自幫助 SysGetIPAddresses
static get(url) {
rst := ComObject("WinHttp.WinHttpRequest.5.1")
rst.open("GET", url)
try {
rst.send()
return rst.ResponseText
}
}
;網址,編碼, 請求方式,post數據(NOTE 可能不好用)
;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send
static post(url, postData:="", Encoding:="", headers:="") {
rst := ComObject("WinHttp.WinHttpRequest.5.1")
rst.open("POST", url)
if isobject(headers) {
for k, v in headers {
if v
rst.SetRequestHeader(k, v)
}
}
rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded")
; hyf_objView(postData)
if isobject(postData) { ;NOTE 要轉編碼
param := ""
for k, v in postData {
if (A_Index == 1)
param := format("{1}={2}", k,_Web.UrlEncode(v))
else
param .= format("&{1}={2}", k,_Web.UrlEncode(v))
}
rst.send(param)
rst.WaitForResponse(postData.has("timeout") ? postData.timeout : -1)
} else {
rst.send()
}
; rsy.option(2) := nPage ;Codepage:nPage
if Encoding && rst.ResponseBody {
oADO := ComObject("adodb.stream")
oADO.Type := 1
oADO.Mode := 3
oADO.Open()
oADO.Write(rst.ResponseBody)
oADO.Position := 0
oADO.Type := 2
oADO.Charset := Encoding
res := oADO.ReadText()
oADO.Close()
return res
}
return rst.ResponseText
}
;注冊百度智能雲+實名認證+創建應用
; https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
; 創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 apikey secretkey
; 實名認證
; 領取相應的資源:
; 文字識別→概覽→右側【領取免費資源】→選中類別→全部→0元領取
; 等待資源到賬:右鍵【資源列表】→已領取資源
;baiduToken 方法內修改獲取的 apikey secretkey
static baiduToken() {
apikey := "xxx" ;hymodify
secretkey := "xxx" ;hymodify
host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey)
res := _Web.get(host)
obj := json.parse(res)
return obj['access_token']
}
;文檔 https://cloud.baidu.com/doc/OCR/s/nk3h7xy2t
; _Web.baiduOcr_vatInvoice("c:\Users\Administrator\Desktop\22\1.pdf")
;如果fp是pdf,page表示頁碼
static baiduOcr_vatInvoice(fp, page:=1) {
url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
b64 := (strlen(fp)>256) ? fp : _Web._toBase64(fp)
url := format("{1}?access_token={2}", url,_Web.baiduToken())
if (strlen(fp) < 256 && fp ~= "i)pdf$") {
params := map(
"pdf_file" , b64,
"pdf_file_num" , page,
)
} else
params := map("image" , b64)
; hyf_objView(params)
response := _Web.post(url, params, "utf-8")
obj := json.parse(response)
if (obj.has("error_code")) {
msgbox(json.stringify(obj, 4))
return []
; throw obj["error_code"] . "`n" . obj["error_msg"]
} else
return obj["words_result"]
}
static _toBase64(fp) {
buf := FileRead(fp, "raw")
dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",0, "uint*",&nSize:=0)
b64 := buffer(nSize << 1, 0)
dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",b64, "uint*",&nSize)
return strget(b64)
}
;字符串特殊字符轉義成URL格式(來自萬年書妖)
static UrlEncode(str, enc:="UTF-8") {
hex := "00"
fun := "msvcrt\swprintf"
buff := buffer(size:=strput(str, enc))
strput(str, buff, enc)
while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl")
r .= hex
return r
;StringReplace, str, str, `%, , A ;%為URL特殊轉義符,先處理(Google對%符的搜索支持不好才刪除,否則替換為%25)
;array := map("&","%26"," ","%20","(","%28",")","%29","'","%27",",","%3A","/","%2F","+","%2B",A_Tab,"%21","`r`n","%0A") ;`r`n必須放一起,可用記事本測試
;for, key, value in array ;特殊字符url轉義
;StringReplace, str, str, %key%, %value%, A ;此處循環,兩個參數必須一樣
;return str
}
}
