准備兩個文件即可
conv.vbs
run.bat
conv.vbs源碼
'用法:將要更改編碼的所有文件放到同一個文件夾中,將文件夾拖到該vbs上,輸入要轉換成的字符編碼
Dim fso,fd,fl,f,fdpath,charset
On Error Resume Next
If WScript.Arguments.Length>=1 Then
fdpath = WScript.Arguments(0)
Else
fdpath = InputBox("E:\xunlian\新增加的訓練集","第一個參數")
If fdpath = "" Then WScript.Quit
End If
If WScript.Arguments.Length>=2 Then
charset = WScript.Arguments(1)
Else
charset = InputBox("UTF-8","第二個參數")
if charset = "" then WScript.Quit
if UCase(charset) = "ANSI" then charset = "GB2312"
End If
Set fso = CreateObject("scripting.filesystemobject")
Set fd = fso.GetFolder(fdpath)
Set fl=fd.Files
For each f in fl
convertct f.Path,charset
Next
MsgBox "字符編碼轉換結束",,"tya提示"
'將讀取的文件內容以指定編碼寫入文件
Function convertct(filepath,charset)
Dim FileName, FileContents, dFileContents
FileName = filepath
FileContents = LoadFile(FileName)
Set savefile = CreateObject("adodb.stream")
savefile.Type = 2 '這里1為二進制,2為文本型
savefile.Mode = 3
savefile.Open()
savefile.charset = charset
savefile.Position = savefile.Size
savefile.Writetext(FileContents) 'write寫二進制,writetext寫文本型
savefile.SaveToFile filepath,2
savefile.Close()
set savefile = nothing
End Function
'以文件本身編碼讀取文件
Function LoadFile(Path)
Dim Stm2
Set Stm2 = CreateObject("ADODB.Stream")
Stm2.Type = 2
Stm2.Mode = 3
Stm2.Open
Stm2.Charset = CheckCode(path)
'Stm2.Charset = "UTF-8"
'Stm2.Charset = "Unicode"
'Stm2.Charset = "GB2312"
Stm2.position = Stm2.Size
Stm2.LoadFromFile Path
LoadFile = Stm2.ReadText
Stm2.Close
Set Stm2 = Nothing
End Function
'該函數檢查並返回文件的編碼類型
Function CheckCode(file)
Dim slz
set slz = CreateObject("Adodb.Stream")
slz.Type = 1
slz.Mode = 3
slz.Open
slz.Position = 0
slz.Loadfromfile file
Bin=slz.read(2)
If is_valid_utf8(read(file)) Then
Codes="UTF-8"
ElseIf AscB(MidB(Bin,1,1))=&HFF and AscB(MidB(Bin,2,1))=&HFE Then
Codes="Unicode"
Else
Codes="GB2312"
End if
slz.Close
Set slz = Nothing
CheckCode = Codes
End Function
'將Byte()數組轉成String字符串
Function read(path)
Dim ado, a(), i, n
Set ado = CreateObject("ADODB.Stream")
ado.Type = 1 : ado.Open
ado.LoadFromFile path
n = ado.Size - 1
ReDim a(n)
For i = 0 To n
a(i) = ChrW(AscB(ado.Read(1)))
Next
read = Join(a, "")
End Function
'准確驗證文件是否為utf-8(能驗證無BOM頭的uft-8文件)
Function is_valid_utf8(ByRef input) 'ByRef以提高效率
Dim s, re
Set re = New Regexp
s = "[\xC0-\xDF]([^\x80-\xBF]|$)"
s = s & "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)"
s = s & "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)"
s = s & "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)"
s = s & "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)"
s = s & "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)"
s = s & "|[\x00-\x7F][\x80-\xBF]"
s = s & "|[\xC0-\xDF].[\x80-\xBF]"
s = s & "|[\xE0-\xEF]..[\x80-\xBF]"
s = s & "|[\xF0-\xF7]...[\x80-\xBF]"
s = s & "|[\xF8-\xFB]....[\x80-\xBF]"
s = s & "|[\xFC-\xFD].....[\x80-\xBF]"
s = s & "|[\xFE-\xFE]......[\x80-\xBF]"
s = s & "|^[\x80-\xBF]"
re.Pattern = s
is_valid_utf8 = (Not re.Test(input))
End Function
run.bat 源碼
rem conv.vbs "E:\xunlian\新增加的訓練集\博彩" "UTF-8" conv.vbs "E:\xunlian\新增加的訓練集\博彩" "UTF-8"
bat和vbs放到一起,bat內容如下:
rem conv.vbs "改為txt所在文件夾a的路徑" "改為要轉換成的字符編碼"
conv.vbs "改為txt所在文件夾a的路徑" "UTF-8"
使用實例見run.bat文件內容,注意待轉換的txt文件路徑,雙擊該文件即將執行批處理
歡迎下載:鏈接:https://pan.baidu.com/s/1uWySdPa-73ByFoM-CACAdA 密碼:cul0
心得:以上是看到熱心網友分享的,我也用過,感覺還是蠻好用的,支持。
