python運用 - log信息提取（知識：遍歷

python運用 - log信息提取（知識：遍歷 | os ）

本文轉載自查看原文 2019-09-18 15:18 342 python

運用到的python知識點：

字典：

python字典的幾種方式：

1）key值遍歷

1 d = {'a': '1', 'b': '2', 'c': '3'}
2 
3 for k in d:
4     print(k+':'+d[k])
5 print('------------')  
6 
7 for k in d.keys():
8     print(k+':'+d[k])

key值遍歷

1 a:1
2 b:2
3 c:3
4 ------------
5 a:1
6 b:2
7 c:3

運行結果

for k in d: 與 for k in d.keys(): 完全等價

2）value遍歷

1 d = {'a': '1', 'b': '2', 'c': '3'}
2   
3 for v in d.values():
4     print(v)
5 
6 運行結果
7 1
8 2
9 3

value遍歷

3）遍歷字典項

1 d = {'a': '1', 'b': '2', 'c': '3'}
2   
3 for kv in d.items():
4     print(kv)
5 
6 運行結果
7 ('a', '1')
8 ('b', '2')
9 ('c', '3')

字典項遍歷

4）字典鍵、值遍歷

 1 d = {'a': '1', 'b': '2', 'c': '3'}
 2   
 3 for (k,v) in d.items():
 4     print(k,':',v)
 5 print('----------')    
 6 for k,v in d.items():
 7     print(k,':',v)   
 8 
 9 
10 運行結果
11 a : 1
12 b : 2
13 c : 3
14 ----------
15 a : 1
16 b : 2
17 c : 3

字典鍵、值遍歷

for (k,v) in d.items(): 根據運行結果可見，k，v是否加括號是一樣的

os模塊:

os.getcwd()：查看當前所在路徑。

os.listdir(path):列舉目錄下的所有文件。返回的是列表類型

os.path.abspath(path):返回path的絕對路徑。

os.path.split(path):將路徑分解為(文件夾,文件名)，返回的是元組類型

os.path.join(path1,path2,...):將path進行組合，若其中有絕對路徑，則之前的path將被刪除

os.path.dirname(path):返回path中的文件夾部分，結果不包含'\'

os.path.basename(path):返回path中的文件名

os.path.getmtime(path):文件或文件夾的最后修改時間，從新紀元到訪問時的秒數

os.path.getatime(path):文件或文件夾的最后訪問時間，從新紀元到訪問時的秒數

os.path.getctime(path):文件或文件夾的創建時間，從新紀元到訪問時的秒數

os.path.getsize(path):文件或文件夾的大小，若是文件夾返回0

os.path.exists(path):文件或文件夾是否存在，返回True 或 False

os.path.isfile(path):該目錄是否為一個文件，返回True 或 False

os.path.isdir(path):該目錄是否為一個文件夾，返回True 或 False

其他：

strip()：去空格

split()：分隔

slice:切片

 1 def testpath():
 2     print(__file__)#獲取當前文件的全目錄
 3     print(sys.argv[0])#獲取當前文件的全目錄
 4     print(os.curdir)#.
 5     
 6     print(os.getcwd())#獲取當前工作目錄路徑
 7     print(os.path.abspath('.')) #獲取當前工作目錄路徑
 8     print(os.path.abspath(os.curdir)) #獲取當前工作目錄路徑
 9         
10     print(os.path.abspath('test.txt')) #獲取當前目錄文件下的工作目錄路徑
11     
12     print(os.path.abspath('..')) #獲取當前工作的父目錄 ！注意是父目錄路徑
13     #os.chdir(path)
14 
15 if __name__ == '__main__':
16     testpath()
17 
18 
19 '''
20 F:\Android\workspace\Demo\src\Func.py
21 F:\Android\workspace\Demo\src\Func.py
22 .
23 F:\Android\workspace\Demo\src
24 F:\Android\workspace\Demo\src
25 F:\Android\workspace\Demo\src
26 F:\Android\workspace\Demo\src\test.txt
27 F:\Android\workspace\Demo
28 
29 '''

Func.py

-----------------------------------------------------------------------

案列：

log：

從以上大篇幅的log中提取出如下信息：

sn，mode，time 相同的只統計一次，loc統計不重復的位置

content:SN=56636200000686;MODE=0;TIME=2019-9-18 8:58:39;LOC=0-460,0,6338,20935,8,31,0,24-460,0,6338,20935,8,31,0,24-460,0,6338,20935,8,31,0,24;TEST_NUM=248;LOC_ERR=26;CONN_ERR=10;SEND_ERR=2;REC_ERR=0

  1 #coding=utf-8
  2 '''
  3 Created on 2019年9月17日
  4 
  5 @author: yanerfree
  6 '''
  7 import re
  8 import xlrd
  9 from xlutils.copy import copy
 10 import os 
 11 #import shutil
 12 
 13 class recorder():
 14     def __init__(self,sn,mode,dat):
 15         self.sn = sn
 16         self.mode = mode
 17         self.dat = dat#日期
 18         self.loc = []
 19         self.test_num = 0
 20         self.loc_err = 0
 21         self.conn_err = 0
 22         self.send_err = 0
 23         self.rec_err = 0
 24         
 25     
 26     def updateInfo(self,str):
 27         print('------更新條目信息------')
 28         pattern_2 = re.compile('content:SN=(.*?);MODE=(.*?);TIME=(.*?) .*?LOC=(.*?);TEST_NUM=(.*?);LOC_ERR=(.*?);CONN_ERR=(.*?);SEND_ERR=(.*?);REC_ERR=(\d*)' ,re.S)
 29         res = re.findall(pattern_2, str)[0]
 30         print('res:',res)
 31         #將loc不同的收集起來
 32         loclist = res[3].split(',')
 33         loc34 = '['+loclist[2]+','+loclist[3]+']'
 34         print('loc34:',loc34)
 35         if loc34 not in self.loc:
 36             #print('loc34:',loc34)
 37             self.loc.append(loc34)
 38             
 39         if int(res[4]) > self.test_num:#此處必須將字符類型轉換成整數類型進行比較
 40             self.test_num = int(res[4])
 41             self.loc_err = int(res[5])
 42             self.conn_err = int(res[6])
 43             self.send_err = int(res[7])
 44             self.rec_err = int(res[8])
 45             
 46     def setInfo(self,str):   
 47         print('創建新條目：',str)
 48         pattern_2 = re.compile('content:SN=(.*?);MODE=(.*?);TIME=(.*?) .*?LOC=(.*?);TEST_NUM=(.*?);LOC_ERR=(.*?);CONN_ERR=(.*?);SEND_ERR=(.*?);REC_ERR=(\d*)' ,re.S)
 49         res = re.findall(pattern_2, str)[0]
 50         loclist = res[3].split(',')
 51         print('loclist:',loclist)
 52         loc34 = '['+loclist[2]+','+loclist[3]+']'
 53         print('loc34:',loc34)
 54         self.loc= [loc34]
 55         self.test_num = int(res[4])
 56         self.loc_err = int(res[5])
 57         self.conn_err = int(res[6])
 58         self.send_err = int(res[7])
 59         self.rec_err = int(res[8])
 60         
 61 
 62 
 63 def washdata(filepath,pattern):
 64     f = open(filepath,'r',encoding='utf-8')
 65     cls_dic = {}#class對象
 66     
 67     line = f.readline().strip()#去空格、換行
 68     while line:
 69         #print('line:',line)
 70         res=re.findall(pattern,line)
 71         #print('res:',res)
 72         #print('len(res):',len(res))
 73         
 74         if len(res) == 1:
 75             info_list = re.findall(pattern_1,res[0])[0]
 76             print('info_list:',info_list)
 77             sn = info_list[0]
 78             mode = info_list[1]
 79             dat = info_list[2]
 80             flag = 0#該對象是否已經創建，1是已創建，直接更新信息即可
 81             for key,cls in cls_dic.items():
 82                 if cls.dat == dat and cls.sn == sn and cls.mode == mode:
 83                     cls.updateInfo(res[0])
 84                     flag = 1
 85             if flag == 0:
 86                 #新建一個class
 87                 cls_name = 'record'+str(len(cls_dic))
 88                 cls_dic[cls_name] = recorder(sn,mode,dat)  
 89                 cls_dic[cls_name].setInfo(res[0])
 90                     
 91         line = f.readline()
 92         
 93     f.close()   
 94     return cls_dic
 95 
 96 #對單個文件操作
 97 def writetoexcel(cls_dic,savefilename):
 98     print('------將數據寫到excel中------')
 99     #遍歷cls_dic
100     workbook1 =  xlrd.open_workbook(savefilename)
101     rows = workbook1.sheet_by_index(0).nrows
102     workbook2 = copy(workbook1)#拷貝一份原來的excel
103     #根據名字獲取指定sheet頁
104     sheet=workbook2.get_sheet('Sheet1')
105     row = rows
106     print('excel中已有數據%d行'%rows)
107 
108     for k,c in cls_dic.items():
109         sheet.write(row, 0, c.dat)
110         sheet.write(row, 1, c.sn)
111         sheet.write(row, 2, c.mode)
112         sheet.write(row, 3, c.loc)
113         sheet.write(row, 4, c.test_num)
114         sheet.write(row, 5, c.loc_err)
115         sheet.write(row, 6, c.conn_err)
116         sheet.write(row, 7, c.send_err)
117         sheet.write(row, 8, c.rec_err)
118         
119         row += 1
120         
121     workbook2.save(savefilename) 
122       
123 #對多個文件操作
124 def traverse(filepath,savefilename):
125     list = os.listdir(filepath)
126     for i in range(0,len(list)):
127         #print list[i]
128         tmp_path = os.path.join(filepath,list[i])
129         #print tmp_path
130         if os.path.isfile(tmp_path):
131             if tmp_path[-4:] == ".txt":
132                 print('需要抓取信息的文件為 :',tmp_path)
133                 cls_dic = washdata(tmp_path,pattern)
134                 writetoexcel(cls_dic,savefilename)
135         else:
136             traverse(tmp_path,savefilename)
137                  
138 
139 savefilename = './result.xls'  
140 pattern = re.compile('(content:SN=.*?;REC_ERR=\d*)' ,re.S)
141 pattern_1 = re.compile('content:SN=(.*?);MODE=(.*?);TIME=(.*?) .*?LOC=(.*?);TEST_NUM=(.*?);LOC_ERR=(.*?);CONN_ERR=(.*?);SEND_ERR=(.*?);REC_ERR=(\d*)' ,re.S)   
142     
143 if __name__ == '__main__':
144     '''
145     #suite for single file
146     cls_dic = washdata(filepath,pattern)
147     writetoexcel(cls_dic,savefilename)
148     '''
149     #suite for more than one file
150     filepath = r'F:\02_testcase\log_P6_test'
151     savefilename = './result.xls'
152     traverse(filepath,savefilename)
153    
154     print('------End------')
155

extractInfo

上一版存在的問題：sn，mode，time 相同的只統計一次，但是代碼每次遍歷一個文件就創建一個dic保存信息並寫入到excel中，那么同樣的sn，mode，time在多個文件中出現的話，就會存在統計了多次的情況

所以升級版主要為解決上述問題，並引入logging模塊，將log打印到文件中便於查看，控制台打印的log有限

  1 #coding=utf-8
  2 '''
  3 Created on 2019年9月19日
  4 
  5 @author: yanerfree
  6 
  7 該版本對前一版升級，修復不同文件中的sn,mode,dat相同的情況下，收集了多個條目，應該是統計到一個條目中
  8 所以此次通過全局變量dic保存需要的信息，將所有的文件遍歷完之后，再寫入到excel中
  9 
 10 '''
 11 import re
 12 import xlrd
 13 from xlutils.copy import copy
 14 import os 
 15 from log_config import *
 16 #import shutil
 17 
 18 
 19 class recorder():
 20     def __init__(self,sn,mode,dat):
 21         self.sn = sn
 22         self.mode = mode
 23         self.dat = dat#日期
 24         self.loc = []
 25         self.test_num = 0
 26         self.loc_err = 0
 27         self.net_err = 0
 28         self.conn_err = 0
 29         self.send_err = 0
 30         self.rec_err = 0
 31         self.signal = []
 32        
 33     
 34     def updateInfo(self,str):
 35         logger.info('------更新條目信息------')
 36         logger.info('更新條目：%s'%str)
 37         res = re.findall(pattern_1, str)[0]
 38         logger.info('res:')
 39         logger.info(res)
 40         #將loc不同的收集起來
 41         loclist = res[3].split('-')[1].split(',')
 42         loc234 = '['+loclist[1]+','+loclist[2]+','+loclist[3]+']'
 43         #print('loc234:',loc234)
 44         if loc234 not in self.loc:
 45             #print('loc34:',loc34)
 46             self.loc.append(loc234)
 47         if (res[10]+',') not in self.signal:
 48             self.signal.append(res[10]+',')
 49 
 50         if int(res[4]) > self.test_num:#此處必須將字符類型轉換成整數類型進行比較
 51             self.test_num = int(res[4])
 52             self.loc_err = int(res[5])
 53             self.net_err = int(res[6])
 54             self.conn_err = int(res[7])
 55             self.send_err = int(res[8])
 56             self.rec_err = int(res[9])
 57             
 58     def setInfo(self,str):   
 59         logger.info('創建新條目：%s'%str)
 60         res = re.findall(pattern_1, str)[0]
 61         loclist = res[3].split('-')[1].split(',')
 62         loc234 = '['+loclist[1]+','+loclist[2]+','+loclist[3]+']'
 63         logger.info('loc234:%s'%loc234)
 64         self.loc.append(loc234)
 65         
 66         self.test_num = int(res[4])
 67         self.loc_err = int(res[5])
 68         self.net_err = int(res[6])
 69         self.conn_err = int(res[7])
 70         self.send_err = int(res[8])
 71         self.rec_err = int(res[9])
 72         self.signal = [res[10]+',']
 73 
 74 def washdata(filepath,pattern,dic):
 75     f = open(filepath,'r',encoding='utf-8')
 76     cls_dic = dic#class對象
 77     logger.warning(cls_dic)
 78     line = f.readline().strip()#去空格、換行
 79     while line:
 80         #print('line:',line)
 81         res=re.findall(pattern,line)
 82         #print('res:',res)
 83         #print('len(res):',len(res))
 84         
 85         if len(res) == 1:
 86             info_list = re.findall(pattern_1,res[0])[0]
 87             logger.info('info_list:')
 88             logger.info(info_list)
 89             sn = info_list[0]
 90             mode = info_list[1]
 91             dat = info_list[2]
 92             flag = 0#該對象是否已經創建，1是已創建，直接更新信息即可
 93             for key,cls in cls_dic.items():
 94                 if cls.dat == dat and cls.sn == sn and cls.mode == mode:
 95                     cls.updateInfo(res[0])
 96                     flag = 1
 97             if flag == 0:
 98                 #新建一個class
 99                 cls_name = 'record'+str(len(cls_dic))
100                 cls_dic[cls_name] = recorder(sn,mode,dat)  
101                 cls_dic[cls_name].setInfo(res[0])
102                     
103         line = f.readline()
104         
105     f.close()   
106     return cls_dic
107 
108 #對單個文件操作
109 def writetoexcel(cls_dic,savefilename):
110     logger.info('------將數據寫到excel中------')
111     #遍歷cls_dic
112     workbook1 =  xlrd.open_workbook(savefilename)
113     rows = workbook1.sheet_by_index(0).nrows
114     workbook2 = copy(workbook1)#拷貝一份原來的excel
115     #根據名字獲取指定sheet頁
116     sheet=workbook2.get_sheet('Sheet1')
117     #獲取sheet頁的行數（和列數）
118     #rows = sheet.nrows
119     #rows = sheet.get_nrows
120     row = rows
121     logger.info('excel中已有數據%d行'%rows)
122     
123     logger.info('cls_dic:%s'%cls_dic)
124     for k,c in cls_dic.items():
125         sheet.write(row, 0, c.dat)
126         sheet.write(row, 1, c.sn)
127         sheet.write(row, 2, c.mode)
128         sheet.write(row, 3, c.loc)
129         #sheet.write(row, 4, c.locAddress)
130         sheet.write(row, 4, c.test_num)
131         sheet.write(row, 5, c.loc_err)
132         sheet.write(row, 6, c.net_err)
133         sheet.write(row, 7, c.conn_err)
134         sheet.write(row, 8, c.send_err)
135         sheet.write(row, 9, c.rec_err)
136         sheet.write(row, 10, c.signal)
137         
138         row += 1
139     logger.info('向excel中寫入%d條數據'%(row-rows))    
140         
141     workbook2.save(savefilename) 
142       
143 #對多個文件操作，並將需要關注的數據提取到dic中
144 def traverse(filepath,dic):
145     logger.warning('遍歷文件夾，dic：%s'%dic)
146     logger.warning('len(dic):%d'%len(dic))
147     list = os.listdir(filepath)
148     for i in range(0,len(list)):
149         logger.info(list[i])
150         tmp_path = os.path.join(filepath,list[i])
151         #print tmp_path
152         if os.path.isfile(tmp_path):
153             if tmp_path[-4:] == ".txt":
154                 logger.info('需要抓取信息的文件為 :%s'%tmp_path)
155                 logger.warning('更新or新增條目%s'%dic)
156                 logger.warning('len(dic):%d'%len(dic))
157                 dic = washdata(tmp_path,pattern,dic)
158         else:
159             traverse(tmp_path,dic)
160 
161     return dic
162     #writetoexcel(cls_dic,savefilename)      
163            
164 #pattern = re.compile('(content:SN=.*?;REC_ERR=\d*)' ,re.S)
165 #增加了2個字段信息
166 pattern = re.compile('(content:SN=.*?;SIGNAL=.\d)' ,re.S)
167 pattern_1 = re.compile('content:SN=(.*?);MODE=(.*?);TIME=(.*?) .*?LOC=(.*?);TEST_NUM=(.*?);LOC_ERR=(.*?);NET_ERR=(.*?);CONN_ERR=(.*?);SEND_ERR=(.*?);REC_ERR=(\d*);SIGNAL=(.\d)' ,re.S)   
168     
169 if __name__ == '__main__':
170     '''
171     #suite for single file
172     cls_dic = washdata(filepath,pattern)
173     writetoexcel(cls_dic,savefilename)
174     '''
175 
176     #suite for more than one file
177     filepath = r'E:\05_Test\P6\wifi&2G\log'
178     savefilename = r'E:\05_Test\P6\wifi&2G\result\20190920_result.xls'
179     dic = {}
180     cls_dic = traverse(filepath,dic)
181     writetoexcel(cls_dic,savefilename)
182    
183     logger.info('------End------')

demo_ex

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python 之 BeautifulSoup標簽查找與信息提取 Python網絡爬蟲與信息提取（二）—— BeautifulSoup Python網絡爬蟲與信息提取 Python網絡爬蟲與信息提取（一） python——beautifulsoup標簽搜索以及信息提取 python——博客園首頁信息提取與分析分享一個電子發票信息提取工具(Python) python網絡爬蟲與信息提取——1.requests庫入門 Python自然語言處理---信息提取（中國大學mooc）Python網絡爬蟲與信息提取

python運用 - log信息提取（知識： 遍歷 | os ）

免責聲明！

python運用 - log信息提取（知識：遍歷 | os ）