功能介绍:
提取指定文件夹下的所有增值税发票(格式为jpg或png或pdf(暂时只处理第1页)),把所有信息写到Excel表当前选中的单元格,并重命名原始发票(可指定规则)复制到新文件夹。
由于要用到百度的接口,所以需要注册百度智能云+实名认证+创建应用+领取资源
- https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
- 实名认证
- 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey
- 领取相应的资源:文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
- 等待资源到账:右键【资源列表】→已领取资源→核实是否拥有资源
使用步骤:
- 打开任意Excel表,选中第一个要填的单元格
- 读取脚本说明,修改相应内容后运行即可
下面是AutoHotkey v2 beta版代码
;注册百度智能云+实名认证+创建应用+领取资源(财务)
; https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
; 实名认证
; 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 apikey secretkey
; 领取相应的资源:
; 文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
; 等待资源到账:右键【资源列表】→已领取资源
;NOTE 搜索 hymodify 修改相应信息
;功能:
; 提取 dn0 文件夹电子发票信息(pdf只提取第1页),并写到当前已打开Excel表(从【当前选中单元格】开始写)
#SingleInstance force
if (!ProcessExist("Excel.exe")) {
msgbox("请打开Excel并选中第一个要写入单元格",,0x40000)
ExitApp
}
if (0) {
dn0 := "c:\Users\Administrator\Desktop\11" ;hymodify 【旧】发票文件夹
dn1 := "c:\Users\Administrator\Desktop\22" ;hymodify 【新】发票文件夹(发票重命名后复制到此文件夹)
if !DirExist(dn1)
DirCreate(dn1)
} else {
dn0 := DirSelect(, 2, "选择【旧】发票文件夹")
if (!strlen(dn0))
ExitApp
dn1 := DirSelect(, 2, "选择【新】发票文件夹")
}
arrOcr := [
["发票代码","InvoiceCode"],
["发票号码","InvoiceNum"],
["开票日期","InvoiceDate"],
["校验码","CheckCode"],
["机器编号","MachineCode"],
["金额","AmountInFiguers"],
["服务名称1","CommodityName"],
["税率1","CommodityTaxRate"],
["税额1","CommodityTax"],
["大写金额","AmountInWords"],
["销售方名称","SellerName"],
["销售方纳税人识别号","SellerRegisterNum"],
["销售方地址","SellerAddress"],
["销售方开户行","SellerBank"],
["购买方名称","PurchaserName"],
["购买方纳税人识别号","PurchaserRegisterNum"],
["购买方地址","PurchaserAddress"],
["购买方开户行","PurchaserBank"],
]
arrOther := [
"新文件名", ;依赖 objOcr 结果
"原文件名",
"序号",
]
csOcr := arrOcr.length
cs := csOcr+arrOther.length
arrA := ComObjArray(12, 1, cs)
xl := ox()
st := xl.ActiveSheet
ac := xl.ActiveCell
r := 0
arrError := []
if (ac.row == 1) { ;在第1行,则初始化并写入标题
st.cells.NumberFormat := "@"
;设置标题
for _, arr in arrOcr
arrA[0,A_Index-1] := arr[1]
for _, v in arrOther
arrA[0,csOcr+A_Index-1] := v
ac.resize(1,cs).value := arrA ;要写的第1行
rng1 := ac.offset(1).resize(1,cs)
} else
rng1 := ac.resize(1,cs)
loop files, dn0 . "\*.*", "RF" { ;hymodify 带R会处理子文件夹
if (A_LoopFileAttrib ~= "[HS]")
continue
if !(A_LoopFileName ~= "i)\.(pdf|jpg|png)") ;hymodify 过滤文件格式
continue
tooltip(A_Index . "`n" . A_LoopFileName)
objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath)
; hyf_objView(objOcr)
arrA := ComObjArray(12, 1, cs) ;每行写一次
;写入 ocr 内容
noExt := ""
if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功获取结果
for _, arr in arrOcr {
res := objOcr[arr[2]]
if (isobject(res)) {
if (res.length)
arrA[0,A_Index-1] := res[1]["word"]
} else
arrA[0,A_Index-1] := res
}
noExt := format("{1}-{2}", delete0(objOcr["AmountInFiguers"]),objOcr["InvoiceNum"]) ;hymodify 新文件名规则,默认是(金额-发票号码)
arrA[0,csOcr] := noExt
}
;常规内容
arrA[0,csOcr+1] := A_LoopFileName
arrA[0,csOcr+2] := r+1
;arrA写到整行
rng1.offset(r).value := arrA
r++
;复制文件
if strlen(dn1) {
if (strlen(noExt)) {
SplitPath(A_LoopFileFullPath, &fn,, &ext)
try
FileCopy(A_LoopFileFullPath, format("{1}\{2}.{3}", dn1,noExt,ext))
catch
arrError.push(A_LoopFileName)
} else {
SplitPath(A_LoopFileFullPath, &fn,, &ext, &noExt)
FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext))
}
}
}
WinActivate("ahk_id " . st.application.hwnd)
tooltip
if arrError.length
msgbox("以下文件复制时出错了,请核实`n`n" . json.stringify(arrError, 4))
else
msgbox("已完成",,0x40000)
ExitApp
ox(winTitle:="ahk_class XLMAIN") {
if WinExist(winTitle)
ctlID := ControlGetHwnd("EXCEL71")
else
return ComObject("Excel.application")
numput('Int64',0x0000000000020400, 'Int64',0x46000000000000C0, IID_IDispatch:=buffer(16))
dllcall("oleacc\AccessibleObjectFromWindow", "ptr",ctlID, "uint",0xFFFFFFF0, "ptr",IID_IDispatch, "ptr*",win:=ComValue(9,0), 'HRESULT')
loop {
try
return win.application
catch
ControlSend("{escape}", "EXCEL71")
}
}
delete0(num) {
if (num ~= "^-?\d+\.\d+$") {
if (num ~= "\.\d{8,}$") ;小数位太多的异常
num := round(num+0.00000001, 6)
return rtrim(RegExReplace(num, "\.\d*?\K0+$"), ".")
} else
return num
}
class _Web {
;来自帮助 SysGetIPAddresses
static get(url) {
rst := ComObject("WinHttp.WinHttpRequest.5.1")
rst.open("GET", url)
try {
rst.send()
return rst.ResponseText
}
}
;网址,编码, 请求方式,post数据(NOTE 可能不好用)
;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send
static post(url, postData:="", Encoding:="", headers:="") {
rst := ComObject("WinHttp.WinHttpRequest.5.1")
rst.open("POST", url)
if isobject(headers) {
for k, v in headers {
if v
rst.SetRequestHeader(k, v)
}
}
rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded")
; hyf_objView(postData)
if isobject(postData) { ;NOTE 要转编码
param := ""
for k, v in postData {
if (A_Index == 1)
param := format("{1}={2}", k,_Web.UrlEncode(v))
else
param .= format("&{1}={2}", k,_Web.UrlEncode(v))
}
rst.send(param)
rst.WaitForResponse(postData.has("timeout") ? postData.timeout : -1)
} else {
rst.send()
}
; rsy.option(2) := nPage ;Codepage:nPage
if Encoding && rst.ResponseBody {
oADO := ComObject("adodb.stream")
oADO.Type := 1
oADO.Mode := 3
oADO.Open()
oADO.Write(rst.ResponseBody)
oADO.Position := 0
oADO.Type := 2
oADO.Charset := Encoding
res := oADO.ReadText()
oADO.Close()
return res
}
return rst.ResponseText
}
;注册百度智能云+实名认证+创建应用
; https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
; 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 apikey secretkey
; 实名认证
; 领取相应的资源:
; 文字识别→概览→右侧【领取免费资源】→选中类别→全部→0元领取
; 等待资源到账:右键【资源列表】→已领取资源
;baiduToken 方法内修改获取的 apikey secretkey
static baiduToken() {
apikey := "xxx" ;hymodify
secretkey := "xxx" ;hymodify
host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey)
res := _Web.get(host)
obj := json.parse(res)
return obj['access_token']
}
;文档 https://cloud.baidu.com/doc/OCR/s/nk3h7xy2t
; _Web.baiduOcr_vatInvoice("c:\Users\Administrator\Desktop\22\1.pdf")
;如果fp是pdf,page表示页码
static baiduOcr_vatInvoice(fp, page:=1) {
url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
b64 := (strlen(fp)>256) ? fp : _Web._toBase64(fp)
url := format("{1}?access_token={2}", url,_Web.baiduToken())
if (strlen(fp) < 256 && fp ~= "i)pdf$") {
params := map(
"pdf_file" , b64,
"pdf_file_num" , page,
)
} else
params := map("image" , b64)
; hyf_objView(params)
response := _Web.post(url, params, "utf-8")
obj := json.parse(response)
if (obj.has("error_code")) {
msgbox(json.stringify(obj, 4))
return []
; throw obj["error_code"] . "`n" . obj["error_msg"]
} else
return obj["words_result"]
}
static _toBase64(fp) {
buf := FileRead(fp, "raw")
dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",0, "uint*",&nSize:=0)
b64 := buffer(nSize << 1, 0)
dllcall("crypt32\CryptBinaryToString", "Ptr",buf, "UInt",buf.size, "UInt",0x40000001, "Ptr",b64, "uint*",&nSize)
return strget(b64)
}
;字符串特殊字符转义成URL格式(来自万年书妖)
static UrlEncode(str, enc:="UTF-8") {
hex := "00"
fun := "msvcrt\swprintf"
buff := buffer(size:=strput(str, enc))
strput(str, buff, enc)
while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl")
r .= hex
return r
;StringReplace, str, str, `%, , A ;%为URL特殊转义符,先处理(Google对%符的搜索支持不好才删除,否则替换为%25)
;array := map("&","%26"," ","%20","(","%28",")","%29","'","%27",",","%3A","/","%2F","+","%2B",A_Tab,"%21","`r`n","%0A") ;`r`n必须放一起,可用记事本测试
;for, key, value in array ;特殊字符url转义
;StringReplace, str, str, %key%, %value%, A ;此处循环,两个参数必须一样
;return str
}
}