用AutoHotkey調用百度ocr接口提取增值稅發票相關字段並寫到Excel


功能介紹:

提取指定文件夾下的所有增值稅發票(格式為jpg或png或pdf(暫時只處理第1頁)),把所有信息寫到Excel表當前選中的單元格,並重命名原始發票(可指定規則)復制到新文件夾。
由於要用到百度的接口,所以需要注冊百度智能雲+實名認證+創建應用+領取資源

  • https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
  • 實名認證
  • 創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 appid apikey secretkey
  • 領取相應的資源:文字識別→概覽→右側【領取免費資源】→選中【財務票據OCR】→全部→0元領取
  • 等待資源到賬:右鍵【資源列表】→已領取資源→核實是否擁有資源

使用步驟:

  • 打開任意Excel表,選中第一個要填的單元格
  • 讀取腳本說明,修改相應內容后運行即可

下面是AutoHotkey v2 beta版代碼

;注冊百度智能雲+實名認證+創建應用+領取資源(財務)
;   https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
;   實名認證
;   創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 apikey secretkey
;   領取相應的資源:
;       文字識別→概覽→右側【領取免費資源】→選中【財務票據OCR】→全部→0元領取
;       等待資源到賬:右鍵【資源列表】→已領取資源

;NOTE 搜索 hymodify 修改相應信息
;功能:
;   提取 dn0 文件夾電子發票信息(pdf只提取第1頁),並寫到當前已打開Excel表(從【當前選中單元格】開始寫)
#SingleInstance force

if (!ProcessExist("Excel.exe")) {
    msgbox("請打開Excel並選中第一個要寫入單元格",,0x40000)
    ExitApp
}

if (0) {
    dn0 := "c:\Users\Administrator\Desktop\11" ;hymodify 【舊】發票文件夾
    dn1 := "c:\Users\Administrator\Desktop\22" ;hymodify 【新】發票文件夾(發票重命名后復制到此文件夾)
    if !DirExist(dn1)
        DirCreate(dn1)
} else {
    dn0 := DirSelect(, 2, "選擇【舊】發票文件夾")
    if (!strlen(dn0))
        ExitApp
    dn1 := DirSelect(, 2, "選擇【新】發票文件夾")
}
arrOcr := [
    ["發票代碼","InvoiceCode"],
    ["發票號碼","InvoiceNum"],
    ["開票日期","InvoiceDate"],
    ["校驗碼","CheckCode"],
    ["機器編號","MachineCode"],
    ["金額","AmountInFiguers"],
    ["服務名稱1","CommodityName"],
    ["稅率1","CommodityTaxRate"],
    ["稅額1","CommodityTax"],
    ["大寫金額","AmountInWords"],
    ["銷售方名稱","SellerName"],
    ["銷售方納稅人識別號","SellerRegisterNum"],
    ["銷售方地址","SellerAddress"],
    ["銷售方開戶行","SellerBank"],
    ["購買方名稱","PurchaserName"],
    ["購買方納稅人識別號","PurchaserRegisterNum"],
    ["購買方地址","PurchaserAddress"],
    ["購買方開戶行","PurchaserBank"],
]
arrOther := [
    "新文件名", ;依賴 objOcr 結果
    "原文件名",
    "序號",
]

csOcr := arrOcr.length
cs := csOcr+arrOther.length
arrA := ComObjArray(12, 1, cs)
xl := ox()
st := xl.ActiveSheet
ac := xl.ActiveCell
r := 0
arrError := []
if (ac.row == 1) { ;在第1行,則初始化並寫入標題
    st.cells.NumberFormat := "@"
    ;設置標題
    for _, arr in arrOcr
        arrA[0,A_Index-1] := arr[1]
    for _, v in arrOther
        arrA[0,csOcr+A_Index-1] := v
    ac.resize(1,cs).value := arrA ;要寫的第1行
    rng1 := ac.offset(1).resize(1,cs)
} else
    rng1 := ac.resize(1,cs)
loop files, dn0 . "\*.*", "RF" { ;hymodify 帶R會處理子文件夾
    if (A_LoopFileAttrib ~= "[HS]")
        continue
    if !(A_LoopFileName ~= "i)\.(pdf|jpg|png)") ;hymodify 過濾文件格式
        continue
    tooltip(A_Index . "`n" . A_LoopFileName)
    objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath)
    ; hyf_objView(objOcr)
    arrA := ComObjArray(12, 1, cs) ;每行寫一次
    ;寫入 ocr 內容
    noExt := ""
    if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功獲取結果
        for _, arr in arrOcr {
            res := objOcr[arr[2]]
            if (isobject(res)) {
                if (res.length)
                    arrA[0,A_Index-1] := res[1]["word"]
            } else
                arrA[0,A_Index-1] := res
        }
        noExt := format("{1}-{2}", delete0(objOcr["AmountInFiguers"]),objOcr["InvoiceNum"]) ;hymodify 新文件名規則,默認是(金額-發票號碼)
        arrA[0,csOcr] := noExt
    }
    ;常規內容
    arrA[0,csOcr+1] := A_LoopFileName
    arrA[0,csOcr+2] := r+1
    ;arrA寫到整行
    rng1.offset(r).value := arrA
    r++
    ;復制文件
    if strlen(dn1) {
        if (strlen(noExt)) {
            SplitPath(A_LoopFileFullPath, &fn,, &ext)
            try
                FileCopy(A_LoopFileFullPath, format("{1}\{2}.{3}", dn1,noExt,ext))
            catch
                arrError.push(A_LoopFileName)
        } else {
            SplitPath(A_LoopFileFullPath, &fn,, &ext, &noExt)
            FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext))
        }
    }
}
WinActivate("ahk_id " . st.application.hwnd)
tooltip
if arrError.length
    msgbox("以下文件復制時出錯了,請核實`n`n" . json.stringify(arrError, 4))
else
    msgbox("已完成",,0x40000)
ExitApp

ox(winTitle:="ahk_class XLMAIN") {
    if WinExist(winTitle)
        ctlID := ControlGetHwnd("EXCEL71")
    else
        return ComObject("Excel.application")
    numput('Int64',0x0000000000020400, 'Int64',0x46000000000000C0, IID_IDispatch:=buffer(16))
    dllcall("oleacc\AccessibleObjectFromWindow", "ptr",ctlID, "uint",0xFFFFFFF0, "ptr",IID_IDispatch, "ptr*",win:=ComValue(9,0), 'HRESULT')
    loop {
        try
            return win.application
        catch
            ControlSend("{escape}", "EXCEL71")
    }
}

delete0(num) {
    if (num ~= "^-?\d+\.\d+$") {
        if (num ~= "\.\d{8,}$") ;小數位太多的異常
            num := round(num+0.00000001, 6)
        return rtrim(RegExReplace(num, "\.\d*?\K0+$"), ".")
    } else
        return num
}

class _Web {

    ;來自幫助 SysGetIPAddresses
    static get(url) {
        rst := ComObject("WinHttp.WinHttpRequest.5.1")
        rst.open("GET", url)
        try {
            rst.send()
            return rst.ResponseText
        }
    }

    ;網址,編碼, 請求方式,post數據(NOTE 可能不好用)
    ;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send
    static post(url, postData:="", Encoding:="", headers:="") {
        rst := ComObject("WinHttp.WinHttpRequest.5.1")
        rst.open("POST", url)
        if isobject(headers) {
            for k, v in headers {
                if v
                    rst.SetRequestHeader(k, v)
            }
        }
        rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded")
        ; hyf_objView(postData)
        if isobject(postData) { ;NOTE 要轉編碼
            param := ""
            for k, v in postData {
                if (A_Index == 1)
                    param := format("{1}={2}", k,_Web.UrlEncode(v))
                else
                    param .= format("&{1}={2}", k,_Web.UrlEncode(v))
            }
            rst.send(param)
            rst.WaitForResponse(postData.has("timeout") ? postData.timeout : -1)
        } else {
            rst.send()
        }
        ; rsy.option(2) := nPage ;Codepage:nPage
        if Encoding && rst.ResponseBody {
            oADO := ComObject("adodb.stream")
            oADO.Type := 1
            oADO.Mode := 3
            oADO.Open()
            oADO.Write(rst.ResponseBody)
            oADO.Position := 0
            oADO.Type := 2
            oADO.Charset := Encoding
            res := oADO.ReadText()
            oADO.Close()
            return res
        }
        return rst.ResponseText
    }

    ;注冊百度智能雲+實名認證+創建應用
    ;   https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (產品服務→人工智能→文字識別)
    ;   創建應用→隨便輸入應用名稱→立即創建→查看應用詳情→記錄 apikey secretkey
    ;   實名認證
    ;   領取相應的資源:
    ;       文字識別→概覽→右側【領取免費資源】→選中類別→全部→0元領取
    ;       等待資源到賬:右鍵【資源列表】→已領取資源
    ;baiduToken 方法內修改獲取的 apikey secretkey
    static baiduToken() {
        apikey := "xxx" ;hymodify
        secretkey := "xxx" ;hymodify
        host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey)
        res := _Web.get(host)
        obj := json.parse(res)
        return obj['access_token']
    }

    ;文檔 https://cloud.baidu.com/doc/OCR/s/nk3h7xy2t
    ; _Web.baiduOcr_vatInvoice("c:\Users\Administrator\Desktop\22\1.pdf")
    ;如果fp是pdf,page表示頁碼
    static baiduOcr_vatInvoice(fp, page:=1) {
        url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
        b64 := (strlen(fp)>256) ? fp : _Web._toBase64(fp)
        url := format("{1}?access_token={2}", url,_Web.baiduToken())
        if (strlen(fp) < 256 && fp ~= "i)pdf$") {
            params := map(
                "pdf_file" , b64,
                "pdf_file_num" , page,
            )
        } else
            params := map("image" , b64)
        ; hyf_objView(params)
        response := _Web.post(url, params, "utf-8")
        obj := json.parse(response)
        if (obj.has("error_code")) {
            msgbox(json.stringify(obj, 4))
            return []
            ; throw obj["error_code"] . "`n" . obj["error_msg"]
        } else
            return obj["words_result"]
    }

    static _toBase64(fp) {
        buf := FileRead(fp, "raw")
        dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",0, "uint*",&nSize:=0)
        b64 := buffer(nSize << 1, 0)
        dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",b64, "uint*",&nSize)
        return strget(b64)
    }

    ;字符串特殊字符轉義成URL格式(來自萬年書妖)
    static UrlEncode(str, enc:="UTF-8") {
        hex := "00"
        fun := "msvcrt\swprintf"
        buff := buffer(size:=strput(str, enc))
        strput(str, buff, enc)
        while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl")
            r .= hex
        return r
        ;StringReplace, str, str, `%, , A ;%為URL特殊轉義符,先處理(Google對%符的搜索支持不好才刪除,否則替換為%25)
        ;array := map("&","%26"," ","%20","(","%28",")","%29","'","%27",",","%3A","/","%2F","+","%2B",A_Tab,"%21","`r`n","%0A") ;`r`n必須放一起,可用記事本測試
        ;for, key, value in array  ;特殊字符url轉義
        ;StringReplace, str, str, %key%, %value%, A ;此處循環,兩個參數必須一樣
        ;return str
    }
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM