shell監控腳本


序言:

  前幾天一好友問我服務器監控怎么做?你們公司的監控是怎么做的?有什么開源的監控軟件推薦?常見的開源的監控軟件當然首先推薦ZABBIX,分布式夠強大,而且很多公司都在用,我問他具體什么需求,能監控服務器負載、CPU使用率、內存使用率、硬盤使用率、網卡流量監控、網站PV,UV統計、異常錯誤日志,一旦超出設定的閾值就發郵件告警。確定需求后准備將ZABBIX的一些參考資料發給他看看,末了問了一句,你們有多少台機器,他說就3台,一個在線商城網站,哈哈...就3台機器整毛線ZABBIX,用shell腳本吧,用了一天時間給寫了個初版的shell監控腳本,效果如下圖:

監控項:

  • 15分鍾的系統平均負載
  • CPU使用率
  • 內存使用率
  • 虛擬內存使用率
  • 硬盤使用率
  • 網卡流量
  • 網站PV統計
  • 網站UV統計

郵件發送設置:

  • 每小時監控監控1次,超出設置的閥值就發送郵件通知。
  • 每天08:00發送郵件,上班后可查看前天服務器運行情況。

1.監控腳本

 
        
  1 #!/usr/bin/env  bash
  2 #
  3 # Author     : Jack zhao
  4 # Data         : 2017/12/08    
  5 # Description: This script is used to monitor the use of local system resources.
  6 
  7 ###The global variable###
  8 export PATH=$PATH:/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin
  9 export LANG=C
 10 INITDIR=`pwd`
 11 LOCAL_TIME=`date '+%F %H:%M:%S'`
 12 BEFORE_DAY=`date -d "-1 days" +%F`
 13 WHOLE_TIME=`date '+%H'`
 14 
 15 ###Mailbox configuration###
 16 ACCENT_MAIL="123456@126.com"
 17 
 18 ###The alarm threshold###
 19 LOAD_AVERAGE_ALARM=2
 20 CPU_ALARM=70
 21 MEMORY_ALARM=70
 22 SWAP_ALARM=30
 23 ROOT_PART_ALARM=95
 24 IN_NETWORK_TRAFFIC_ALARM=167772160
 25 OUT_NETWORK_TRAFFIC_ALARM=167772160
 26 
 27 ###Log variable###
 28 ACCESS_LOG_PATH=localhost_access_log.${BEFORE_DAY}.txt
 29 
 30 
 31 ####Timed task###
 32 TIMING=08
 33 
 34 ###Public function###
 35 out_format(){
 36     printf "%-22s %-5s %-5s \n" "$1" ":" "$2"
 37 }
 38 
 39 ###Send an alarm mail###
 40 send_mail(){
 41     python mail.py "$ACCENT_MAIL" "[Alarm]:$1" "`cat $INITDIR/Monitoring.log`"    
 42 }
 43 
 44 ###Get load information 15 minutes###
 45 get_load_average(){
 46     LOAD_AVERAGE=`uptime | awk -F "," '{print$NF}' | sed 's#[[:space:]]##g' `
 47     out_format "Load average"  "$LOAD_AVERAGE" >> $INITDIR/Monitoring.log
 48     return 0
 49 }
 50 
 51 ###Get CPU usage###
 52 get_cpu_usage(){
 53     CPU_FREE=`vmstat 1 5 |sed -n '3,$p' |awk '{x = x + $15} END {print x/5}' |awk -F. '{print $1}'`
 54     CPU_USAGE=$((100 - $CPU_FREE))
 55     out_format "CPU usage"  "${CPU_USAGE}%" >> $INITDIR/Monitoring.log
 56     return 0
 57 }
 58 
 59 ###Get memory usage###
 60 get_memory_usage(){
 61     MEMORY_USED=`free -m | grep -w "buffers\/cache:" | awk '{print$3}'`
 62     MEMORY_TOTAL=`free -m  | grep "^Mem" | awk '{print$2}'`
 63     MEMORY_USAGE=`echo "scale=2;${MEMORY_USED}/${MEMORY_TOTAL}*100;" | bc -l`
 64     out_format "Memory usage"  "${MEMORY_USAGE}%" >> $INITDIR/Monitoring.log
 65     return 0
 66 }
 67 
 68 ###Get Swap usage###
 69 get_swap_usage(){
 70     SWAP_USED=`free -m | grep -w "^Swap" | awk '{print$3}'`
 71     SWAP_TOTAL=`free -m | grep -w "^Swap" | awk '{print$2}'`
 72     SWAP_USAGE=`echo "scale=2;${SWAP_USED}/${SWAP_TOTAL}*100;" | bc -l`
 73     out_format "Swap usage"  "${SWAP_USAGE}%" >> $INITDIR/Monitoring.log
 74     return 0
 75 }
 76 
 77 ###Get disk usage###
 78 get_disk_usage(){
 79     MOUNT_POINT=`df -hP | egrep -wv  '^tmpfs|Filesystem|boot' | awk '{print$NF}'`
 80     for i in `echo $MOUNT_POINT`;do
 81         DISK_USAGE=`df -hP | grep -w "$i" | awk '{print$5}'`
 82         out_format "Disk usage $i"  "${DISK_USAGE}" >> $INITDIR/Monitoring.log
 83     done
 84     ROOT_PART=`df -hP  | grep -w "/" | awk '{print$5}' | awk -F "%" '{print$1}'`
 85     return  0
 86 }
 87 
 88 ###Get network traffic###
 89 get_network_traffic(){
 90     NETWORK_TRAFFIC=`sar -n DEV 1 60|grep Average|grep eth0|awk '{print "Input:",$5*1000*8,"bps","Output:",$6*1000*8,"bps"}'`
 91     out_format "Network traffic"  "${NETWORK_TRAFFIC}" >> $INITDIR/Monitoring.log
 92     return 0
 93 }
 94 
 95 ###Get website PV,UV###
 96 get_website_pu(){
 97     WEBSITE_PV=`grep -v "^127.0.0.1" ${ACCESS_LOG_PATH}  | wc -l`
 98     out_format "Before Day Website PV" "$WEBSITE_PV" >> $INITDIR/Monitoring.log 
 99     WEBSITE_UV=`grep -v "^127.0.0.1" ${ACCESS_LOG_PATH} | awk '{print$1}' | sort | uniq | wc -l`
100     out_format "Before Day Website UV" "$WEBSITE_UV" >> $INITDIR/Monitoring.log 
101 }
102 
103 ###Server information###
104 IP=`ip addr show eth0 | grep -w "inet" | awk -F "[ /]+" '{print$3}'`
105 HOSTNAME=`hostname`
106 SN=`sudo -b /usr/sbin/dmidecode | grep -A5 "System Information" | grep "Serial Number"| awk -F ":" '{print$2}' | sed 's#^[[:space:]]##g'`
107 OS=`cat /etc/redhat-release`
108 
109 ###Program entrance###
110 echo "############### Monitoring information Date:$LOCAL_TIME ###############" > $INITDIR/Monitoring.log
111 out_format "IP"  "$IP" >> $INITDIR/Monitoring.log
112 out_format "Hostname"  "$HOSTNAME" >> $INITDIR/Monitoring.log
113 out_format "SN"  "$SN" >> $INITDIR/Monitoring.log
114 out_format "Os"  "$OS" >> $INITDIR/Monitoring.log
115 
116 ###Call function###
117 get_load_average
118 get_cpu_usage
119 get_memory_usage
120 get_swap_usage
121 get_disk_usage
122 get_network_traffic
123 get_website_pu
124 
125 ###variable format###
126 LOAD_AVERAGE_FORMAT=`echo $LOAD_AVERAGE | awk -F "." '{print$1}'`
127 MEMORY_USAGE_FORMAT=`echo $MEMORY_USAGE | awk -F "." '{print$1}'`
128 SWAP_USAGE_FORMAT=`echo $SWAP_USAGE | awk -F "." '{print$1}'`
129 IN_NETWORK_TRAFFIC=`echo $NETWORK_TRAFFIC | awk '{print$2}'`
130 OUT_NETWORK_TRAFFIC=`echo $NETWORK_TRAFFIC | awk '{print$5}'`
131 
132 ##Monitoring threshold judgment###    
133 if [ ${LOAD_AVERAGE_FORMAT} -ge ${LOAD_AVERAGE_ALARM} ];then
134     send_mail "$IP Load:High load average." 
135 elif [ $CPU_USAGE -ge $CPU_ALARM ]; then
136     send_mail "$IP CPU:usage greater than ${CPU_ALARM}%." 
137 elif [ $MEMORY_USAGE_FORMAT -ge $MEMORY_ALARM ];then
138     send_mail "$IP Memory:usage greater than ${MEMORY_ALARM}%." 
139 elif [ $SWAP_USAGE_FORMAT -ge $SWAP_ALARM ];then
140     send_mail "$IP Swap:usage greater than ${SWAP_ALARM}%." 
141 elif [ $ROOT_PART -ge $ROOT_PART_ALARM ]; then
142     send_mail "$IP Root Partition:usage greater than ${ROOT_PART_ALARM}%."
143 elif [ $IN_NETWORK_TRAFFIC -ge $IN_NETWORK_TRAFFIC_ALARM ]; then
144     send_mail "$IP Network Traffic:Input Greater than ${IN_NETWORK_TRAFFIC_ALARM}bps."  
145 elif [ $OUT_NETWORK_TRAFFIC -ge $OUT_NETWORK_TRAFFIC_ALARM ]; then
146     send_mail "$IP Network Traffic:Output Greater than ${OUT_NETWORK_TRAFFIC_ALARM}bps." 
147 fi
148 
149 ###Record history log###
150 cat $INITDIR/Monitoring.log >> $INITDIR/Monitor_history.log
151 
152 ###Send a notification email at a time###
153 if [ "$WHOLE_TIME" == "$TIMING" ];then
154     python mail.py "$ACCENT_MAIL" "[Notice]:$IP Monitoring information" "`cat $INITDIR/Monitoring.log`"
155 fi
156 
157 ###Delete temporary files###
158 INITDIR_CONFIRM=${INITDIR:-/tmp}
159 rm -f $INITDIR_CONFIRM/Monitoring.log

2.發送郵件腳本

 1 #!/usr/bin/env python
 2 #-*- coding: UTF-8 -*-
 3 import os,sys
 4 reload(sys)
 5 sys.setdefaultencoding('utf8')
 6 import getopt
 7 import smtplib
 8 from email.MIMEText import MIMEText
 9 from email.MIMEMultipart import MIMEMultipart
10 from  subprocess import *
11 
12 def sendqqmail(username,password,mailfrom,mailto,subject,content):
13     gserver = 'smtp.qq.com'
14     gport = 465
15 
16     try:
17         msg = MIMEText(unicode(content).encode('utf-8'))
18         msg['from'] = mailfrom
19         msg['to'] = mailto
20         msg['Reply-To'] = mailfrom
21         msg['Subject'] = subject
22 
23         smtp = smtplib.SMTP_SSL(gserver, gport)
24         smtp.set_debuglevel(0)
25         smtp.ehlo()
26         smtp.login(username,password)
27 
28         smtp.sendmail(mailfrom, mailto, msg.as_string())
29         smtp.close()
30     except Exception,err:
31         print "Send mail failed. Error: %s" % err
32 
33 
34 def main():
35     to=sys.argv[1]
36     subject=sys.argv[2]
37     content=sys.argv[3]
38     sendqqmail('123456','gkhcvklnjyjsbgwic','123456@qq.com',to,subject,content)
39 if __name__ == "__main__":
40     main()

3.readme配置

 1 1.新建一個監控用戶(monitor)
 2     [root@mylab ~]# useradd monitor
 3     [root@mylab ~]# passwd monitor
 4 
 5 2.配置sudo
 6     [root@mylab ~]# cp /etc/sudoers /etc/sudoers.bak.`date +'%F-%T'`
 7     [root@mylab ~]# echo "monitor ALL=(root) NOPASSWD: /usr/sbin/dmidecode" >> /etc/sudoers
 8     [root@mylab ~]# vim /etc/sudoers 
 9     # Defaults    requiretty 
10     將上面行注釋掉
11 3.上傳腳本文件至/home/monitor
12     mail.py #發送郵件腳本
13     monitor.sh #監控腳本
14 
15 4.配置文件說明
16     mail.py #調用第三方郵箱發送郵件
17         以QQ郵件為例修改mail.py文件:(其他平台修改參數即可,一般公司都有郵件接口或者自己內網搭建一台郵件服務器)
18         gserver = 'smtp.qq.com'    
19         gport = 465
20         sendqqmail('123456','gkhcvklnjyjsbgic','123456@qq.com',to,subject,content)
21             QQ郵箱用戶名    IMAP/SMTP第三方登錄授權碼(QQ郵箱設置生成)QQ郵箱
22             
23     monitor.sh #可監控負載、CPU、內存、SWAP、硬盤、網卡流量、網站PV/UV(需要根據需求修改)    
24         ###Mailbox configuration###
25         ACCENT_MAIL="123456@126.com" #告警接受郵箱
26         
27         告警閥值,可根據需要修改
28         ###The alarm threshold### 
29         LOAD_AVERAGE_ALARM=2 #15分鍾內的平均負載,參考:一般小於機器物理CPU個數
30         CPU_ALARM=70 #CPU使用率
31         MEMORY_ALARM=70 #內存使用率
32         SWAP_ALARM=30 #虛擬內存使用率
33         ROOT_PART_ALARM=70 #/掛載點硬盤使用率
34         IN_NETWORK_TRAFFIC_ALARM=167772160 #網卡每分鍾接收流量20M
35         OUT_NETWORK_TRAFFIC_ALARM=167772160 #網絡每分鍾發送流量20M
36         
37         應用日志路徑
38         ###Log variable###
39         ACCESS_LOG_PATH=localhost_access_log.${BEFORE_DAY}.txt
40         
41     Monitor_history.log #日志歷史記錄
42 
43 5.定時發送郵件
44     ####Timed task###
45     TIMING=08 #每天08點發生郵件
46     
47     
48 6.添加自動任務
49     [monitor@mylab ~]$ crontab  -e
50     1 * * * *  /bin/sh  -c  "/home/monitor/monitor.sh &>/dev/null" #每個小時的第一分鍾執行一次

4.寫的最后

目前在測試環境運行了一段時間,待優化后上生產。哈哈....

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM