python處理日志文件


python處理日志文件

1 打開日志文件

雖然,日志文件的后綴為.log,但是基本上與文本文件沒有區別,按照一般讀取文本文件的方式打開即可:

fp =open("e:\\data.log")
fp.close()

應用示例:

fp =open("e:\\data.log")
for line in fp.readlines(): # 遍歷每一行
    filename = line[:14]    # 每行取前14個字母,作為下面新建文件的名稱
    content = line[14:]     # 每行取第15個字符后的所有字符,作為新建文件的內容
 
    with open("e:\\"+filename+".txt","w") as fp2:
        fp2.write(content+"\n")
 
fp.close()
View Code

參考鏈接[1].

2 提取目標信息

日志文件每行字符串由空格分隔,例如對第1個字段(IP、時間等)感興趣,則使用split()方法對每行字符串進行切片,將第1個子字符串存到列表里,用於下一步處理。

示例代碼:

#!/usr/bin/python
# -*- coding: UTF-8 -*-
 
txt = "Google#Runoob#Taobao#Facebook"
 
# 第二個參數為 1,返回兩個參數列表
x = txt.split("#", 1)
 
print x

輸出結果:

['Google', 'Runoob#Taobao#Facebook']

參考鏈接[2].

3 統計分析

在上一步驟中,將感興趣的目標信息存儲到列表中,現使用python統計列表元素出現的次數,參考鏈接[3]提供了很多實現方法[4],本文使用collections[5]中的most_common()方法。

示例:

from collections import Counter
def counter(arr):
    return Counter(arr).most_common(2) # 返回出現頻率最高的兩個數

# 結果:[(2, 3), (1, 2)]

參考鏈接[3-4-5]

4 后記

完整代碼(待整理):

# -*- coding: utf-8 -*-
"""
Created on Thu Apr 11 08:24:02 2019

@author: Green
"""
#import sys
#import time
from collections import Counter
#import pyExcel
import xlwt
fp =open("d:\\aa.log")
#print len(fp.readlines()) # 3593512
mycount = 0
IPlists = []
for line in fp.readlines():
    # control times====================
    #mycount += 1
    #if mycount > 100:
    #    break
    #==================================

    data = line.split(" ") # 依空格切片
    IP = data[0]
    IPlists.append(IP)

fp.close()

print 'Length of IPlists:', len(IPlists)

#IPlists.count()
IP_CountResult = Counter(IPlists).most_common()
#print IP_CountResult
#print '[0][0]', IP_CountResult[0][0]
print 'Length of IP_CountResult:', len(IP_CountResult)

f = xlwt.Workbook() # Create workbook
sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True) # Create sheet
row0 = [u'IP', u'Count']
# Create first row
for i in range(0,len(row0)):
    sheet1.write(0, i, row0[i])
for i in range(0,len(IP_CountResult)):
    for j in range(0,len(IP_CountResult[i])):
        sheet1.write(i+1, j, IP_CountResult[i][j])
f.save('d:\\IP_CountResult.xls') # Save the file


#=====================================
# 測試字符串切片(分割)
# txt = "Google Runoob Taobao Facebook"
# 第二個參數為 1,返回兩個參數列表
# x = txt.split(" ", 1)
# print x[0]
#=====================================

    #filename = line[:14]
    #content = line[14:]
    #with open("e:\\"+filename+".txt","w") as fp2:
    #    fp2.write(content+"\n")
View Code

其他拓展應用,見鏈接[6-9]

另,研究pandas在數據處理、繪圖等方面的應用。

 


 

參考鏈接:

[1]python文件操作--分析系統log並提取有效數據: https://blog.csdn.net/qq_30758629/article/details/80766583

[2]菜鳥教程 - Python split()方法: http://www.runoob.com/python/att-string-split.html

[3]Python統計列表元素出現次數: https://blog.csdn.net/weixin_40604987/article/details/79292493

[4]get_frequency: https://github.com/KARL13YAN/learning/blob/master/get_frequency.py

[5]collections官方文檔: https://docs.python.org/3/library/collections.html


 

[6]python讀取日志 - 周一到周五早上6點半檢查日志中的關鍵字,並發送相應的成功或者失敗短信: https://blog.csdn.net/shirayukixue/article/details/52120110

日志內容如下:  

[16-08-04 06:30:39] Init Outer: StkID:20205 Label:7110 Name:02ͨ Type:3 PriceDigit:4 VolUnit:0 FloatIssued:                                                               0 TotalIssued:                                                               0 LastClose:0 AdvStop:0 DecStop:0
[16-08-04 06:30:39] Init Outer: StkID:20206 Label:9802 Name:982 Type:3 PriceDigit:4 VolUnit:0 FloatIssued:                                                               0 TotalIssued:                                                               0 LastClose:0 AdvStop:0 DecStop:0
[16-08-04 06:30:39] IB Recv DCT_STKSTATIC, Stock Total = 20207 Day=20160804, Ver=1470283608
View Code

配置文件如下:

[MobileNo]
user1 = num1
user2 = num2
user3 = num3

[code_IB]
keys = Stock Total
filepath = /home/level2/ib/datacollect.log
exepath = /home/level2/ib
exefile = dcib.exe
failmsg = IB init fail!
day_of_week = 0-4
hour = 06
minute = 30
View Code

python如下:

#-*- encoding: utf-8 -*-
import re
import sys
import os
import time
import requests
import ConfigParser
import logging
import thread
from logging.handlers import RotatingFileHandler
from apscheduler.schedulers.blocking import BlockingScheduler

#通過logging.basicConfig函數對日志的輸出格式及方式做相關配置
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%a, %d %b %Y %H:%M:%S',
                    filename='search.log',
                    filemode='a')

'''
#定義一個StreamHandler,將INFO級別或更高的日志信息打印到標准錯誤,並將其添加到當前的日志處理對象
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
'''
'''
#定義一個RotatingFileHandler,最多備份5個日志文件,每個日志文件最大10M
Rthandler = RotatingFileHandler('search.log', maxBytes=1*1024*1024,backupCount=2)
Rthandler.setLevel(logging.INFO)
#formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
#Rthandler.setFormatter(formatter)
logging.getLogger('').addHandler(Rthandler)
'''

#讀取配置文件
try:
    conf = ConfigParser.ConfigParser()  #生成config對象
    conf.read("search.ini")  #用config對象讀取配置文件
    #keys = conf.get("main","keys")  #指定session,option讀取值
    #logpath = conf.get("main","filepath")
    mobileno = conf.items("MobileNo")
    #failmsg = conf.get("msg","fail")
    code = conf.sections()
except Exception as exc:
    pass

def getconfig(section):
    #定義全局變量
    global keys, logpath, exepath, exefile, failmsg, day_of_week, hour ,minute
    keys = conf.get(section,"keys")
    logpath = conf.get(section,"filepath")
    exepath = conf.get(section,"exepath")
    exefile = conf.get(section,"exefile")
    failmsg = conf.get(section,"failmsg")
    day_of_week = conf.get(section,"day_of_week")
    hour = conf.get(section,"hour")
    minute = conf.get(section,"minute")
    print keys, logpath, exepath, exefile, failmsg, day_of_week, hour ,minute




#從前端獲取參數,關鍵字,文件名
'''
def getParameters():
    ret = []
    if len(sys.argv) != 3:
        print 'Please input correct parameter,for example:'
        print 'python search.py keyword filepath configpath'
    else:
        for i in range(1,len(sys.argv)):
            print i, sys.argv[i]
            ret.append(sys.argv[i])
        print '+============================================================================+'
        print '  Keyword = %s'%sys.argv[1]
    return ret
'''
def isFileExists(strfile):
    #檢查文件是否存在
    return os.path.isfile(strfile)

def sendMailOk(timefile):
    #初始化正常,發送郵件
    datetimes = timefile.split('[')
    times = datetimes[1].split(']')
    code = timefile.split()
    init = [times[0],code[2],"init success!"]
    message = ' '.join(init) #使用字符串的join方法,可以把列表的各個元素連接起來
    logging.info(message)
    url = 'http://***/smsNew/sendMessage.html'
    #payload = {'clientId':'804D0196-6C0D-4CEF-91E1-1BB85E0217DB','Code':'GB2312','toMobileNo':toMobileNo,'message':message}
    #r = requests.post('http://***/smsNew/sendMessage.html?clientId=804D0196-6C0D-4CEF-91E1-1BB85E0217DB&Code=GB2312&toMobileNo=18516235206&message=test')
    #r = requests.post(url,params = payload)
    #print r.url,r.text

    for i in range(len(mobileno)):
        toMobileNo = mobileno[i][1]
        payload = {'clientId':'804D0196-6C0D-4CEF-91E1-1BB85E0217DB','Code':'GB2312','toMobileNo':toMobileNo,'message':message}
        r = requests.post(url,params = payload)
        print r.url,r.text
        print toMobileNo
    #print r.text
    #getConfig()

def sendMalFail():
    #初始化失敗發送短信
    url = 'http://***/smsNew/sendMessage.html'
    for i in range(len(mobileno)):
        toMobileNo = mobileno[i][1]
        payload = {'clientId':'804D0196-6C0D-4CEF-91E1-1BB85E0217DB','Code':'GB2312','toMobileNo':toMobileNo,'message':failmsg}
        r = requests.post(url,params = payload)
        logging.error(failmsg)
        print r.url,r.text
        print toMobileNo


def Search(keyword, filename):
    #在文件中搜索關鍵字
    if(isFileExists(filename) == False ):
        #print 'Input filepath is wrong,please check agin!'
        logging.error('Input filepath is wrong,please check agin!')
        return False
        #sys.exit()
    linenum = 1
    with open(filename, 'r') as fread:
        lines = fread.readlines()
        for line in lines:
            rs = re.search(keyword, line)
            if rs:
                #打印關鍵字所在行
                #sys.stdout.write('line:%d '%linenum)
                #print line
                lsstr = line.split(keyword)
                #strlength = len(lsstr)
                #logging.info('DC init success! ')
                sendMailOk(lsstr[0])
                '''
                #打印搜索關鍵字所在行信息
                for i in range(strlength):
                    if (i < (strlength - 1)):
                        sys.stdout.write(lsstr[i].strip())
                        sys.stdout.write(keyword)
                    else:
                        sys.stdout.write(lsstr[i].strip() + '\n')
                '''
                #關閉打印日志程
                killdc = "pkill " + exefile
                os.system(killdc)
                return True
                #sys.exit()
            linenum = linenum + 1
        logging.debug('DC not init ,tye agin!')
        return False

def executeSearch():
    '''
    ls = getParameters()
    if(len(ls) == 2):
        while True:
            for i in range(5):
                Search(ls[0], ls[1]) #初始化成功退出腳本,否則繼續循環
                #print i
                time.sleep(60)
            sendMalFail() #連續5次查找都沒有初始化,發送失敗短信
    else:
        print 'There is a parameter error occured in executeSearch()!'
    '''
    #print keys,logpath,mobileno

    #os.system('cd /home/level2/ib && /bin/echo > datacollect.log && nohup ./dcib.exe > /dev/null 2>&1 &')
    startdc = "cd " + exepath + " && /bin/echo > datacollect.log && nohup ./" + exefile + " > /dev/null 2>&1 &"
    os.system(startdc)
    time.sleep(3)

    for i in range(5):
        if Search(keys,logpath)== True:
            return True
        time.sleep(60)
    sendMalFail()
    while Search(keys,logpath) == False:
        time.sleep(60)
def cron():
    scheduler = BlockingScheduler()
    scheduler.add_job(executeSearch, 'cron', day_of_week=day_of_week,hour=hour, minute=minute)
    scheduler.start()



def main():
    #scheduler = BlockingScheduler()
    for i in range(0,len(code)):
        if re.search('code',code[i]):
            getconfig(code[i])
            print "keys=",keys, "; logpath=",logpath, "; exepath=",exepath, "; exefile=",exefile, "; failmsg=",failmsg, "; day_of_week=",day_of_week, "; hour=",hour ,"; minute=",minute
            scheduler = BlockingScheduler()
            scheduler.add_job(executeSearch, 'cron', day_of_week=day_of_week,hour=hour, minute=minute)
            scheduler.start()
            #thread.start_new_thread(cron,())
            #time.sleep(3)


    #executeSearch()

if __name__=='__main__':
    main()
#    executeSearch()
View Code

[7] python每日一練 - 讀取log文件中的數據,並畫圖表: https://www.cnblogs.com/langzou/p/5986245.html

日志內容大致如下: 

python處理代碼:

import matplotlib.pyplot as plt

input = open('serverlog.txt', 'r')

rangeUpdateTime = [0.0]

for line in input:
    line = line.split()
    if 'update' in line:
        rangeUpdateTime.append(float(line[-1]))

plt.figure('frame time')
plt.subplot(211)
plt.plot(rangeUpdateTime, '.r',)
plt.grid(True)
plt.subplot(212)
plt.plot(rangeUpdateTime)
plt.grid(True)
plt.show()
View Code

結果:

 

 [8]統計一個文件中每個單詞出現的次數,列出出現頻率最多的5個單詞: https://www.jb51.net/article/137735.htm

 [9]Python:string.count()返回str在string里面出現的次數: https://blog.csdn.net/chixujohnny/article/details/50259585

示例代碼:

s = 'this is a new technology,and I want to learn this.'
print(s.count('this', 0, len(s)))
#輸出為2
View Code

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM