python 修改文件編碼方式

本文轉載自查看原文 2017-08-22 09:52 9813 Python知識

 1 import chardet
 2 import os
 3 
 4 def strJudgeCode(str):
 5     return chardet.detect(str)
 6 
 7 def readFile(path):
 8     try:
 9         f = open(path, 'r')
10         filecontent = f.read()
11     finally:
12         if f:
13             f.close()
14 
15     return filecontent
16 
17 def WriteFile(str, path):
18     try:
19         f = open(path, 'w')
20         f.write(str)
21     finally:
22         if f:
23             f.close()
24 
25 def converCode(path):
26     file_con = readFile(path)
27     result = strJudgeCode(file_con)
28     #print(file_con)
29     if result['encoding'] == 'utf-8':
30         #os.remove(path)
31         a_unicode = file_con.decode('utf-8')
32         gb2312 = a_unicode.encode('gbk')    
33         WriteFile(gb2312, path)
34 
35 def listDirFile(dir):
36     list = os.listdir(dir)
37     for line in list:
38         filepath = os.path.join(dir, line)
39         if os.path.isdir(filepath):
40             listDirFile(filepath)
41         else:
42             print(line)
43             converCode(filepath)            
44 
45 if __name__ == '__main__':
46     listDirFile(u'.\TRMD')

詳細解釋：







 1 import chardet
 2 import os
 3 
 4 def strJudgeCode(str):
 5     return chardet.detect(str)
 6     '''
 7 chardet.detect()返回字典，其中confidence是檢測精確度，encoding是編碼形式
 8 {'confidence': 0.98999999999999999, 'encoding': 'GB2312'}
 9 （1）網頁編碼判斷：
10 
11 >>> import urllib
12 >>> rawdata = urllib.urlopen('http://www.google.cn/').read()
13 >>> import chardet
14 >>> chardet.detect(rawdata)
15 {'confidence': 0.98999999999999999, 'encoding': 'GB2312'}
16 （2）文件編碼判斷
17 
18 復制代碼
19 import chardet
20 tt=open('c:\\111.txt','rb')
21 ff=tt.readline()
22 #這里試着換成read(5)也可以，但是換成readlines()后報錯
23 enc=chardet.detect(ff)
24 print enc['encoding']
25 tt.close()
26     '''
27 
28 def readFile(path):
29     try:
30         f = open(path, 'r')
31         filecontent = f.read()
32     finally:
33         if f:
34             f.close()
35 
36     return filecontent
37 
38 def WriteFile(str, path):
39     try:
40         f = open(path, 'w')
41         f.write(str)
42     finally:
43         if f:
44             f.close()
45 
46 def converCode(path):
47     file_con = readFile(path)
48     result = strJudgeCode(file_con)
49     #print(file_con)
50     if result['encoding'] == 'utf-8':
51         #os.remove(path)
52         a_unicode = file_con.decode('utf-8')
53     '''
54 使用decode()和encode()來進行解碼和編碼
55 u = '中文' #指定字符串類型對象u
56 str = u.encode('gb2312') #以gb2312編碼對u進行編碼，獲得bytes類型對象str
57 u1 = str.decode('gb2312')#以gb2312編碼對字符串str進行解碼，獲得字符串類型對象u1
58 u2 = str.decode('utf-8')#如果以utf-8的編碼對str進行解碼得到的結果，將無法還原原來的字符串內容
59     '''
60         gb2312 = a_unicode.encode('gbk')    
61         WriteFile(gb2312, path)
62 
63 def listDirFile(dir):
64     list = os.listdir(dir)#返回指定路徑下的文件和文件夾列表。
65     for line in list:
66         filepath = os.path.join(dir, line)
67         '''
68 是在拼接路徑的時候用的。舉個例子，
69 os.path.join(“home”, "me", "mywork")
70 在Linux系統上會返回
71 “home/me/mywork"
72 在Windows系統上會返回
73 "home\me\mywork"
74 好處是可以根據系統自動選擇正確的路徑分隔符"/"或"\"
75         '''
76         if os.path.isdir(filepath):#os.path.isdir()函數判斷某一路徑是否為目錄
77             listDirFile(filepath)
78         else:
79             print(line)
80             converCode(filepath)            
81 
82 if __name__ == '__main__':
83     listDirFile(u'.\TRMD')
84     '''
85 u'string'  表示 已經是 unicode 編碼的 'string' 字符串
86 # -*- coding: UTF-8 -*-   這句是告訴python程序中的文本是utf-8編碼，讓python可以按照utf-8讀取程
87 中文前加u就是告訴python后面的是個unicode編碼，存儲時按unicode格式存儲。
88     '''

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python批量修改文件內容及文件編碼方式的處理修改python默認的編碼方式 python修改文件的方式 eclipse實現批量修改文件的編碼方式修改mysql編碼方式批量修改文件編碼 IDEA 修改文件編碼 IDEA文件編碼修改批量修改文件編碼修改文件編碼