shell监控脚本


序言:

  前几天一好友问我服务器监控怎么做?你们公司的监控是怎么做的?有什么开源的监控软件推荐?常见的开源的监控软件当然首先推荐ZABBIX,分布式够强大,而且很多公司都在用,我问他具体什么需求,能监控服务器负载、CPU使用率、内存使用率、硬盘使用率、网卡流量监控、网站PV,UV统计、异常错误日志,一旦超出设定的阈值就发邮件告警。确定需求后准备将ZABBIX的一些参考资料发给他看看,末了问了一句,你们有多少台机器,他说就3台,一个在线商城网站,哈哈...就3台机器整毛线ZABBIX,用shell脚本吧,用了一天时间给写了个初版的shell监控脚本,效果如下图:

监控项:

  • 15分钟的系统平均负载
  • CPU使用率
  • 内存使用率
  • 虚拟内存使用率
  • 硬盘使用率
  • 网卡流量
  • 网站PV统计
  • 网站UV统计

邮件发送设置:

  • 每小时监控监控1次,超出设置的阀值就发送邮件通知。
  • 每天08:00发送邮件,上班后可查看前天服务器运行情况。

1.监控脚本

 
  1 #!/usr/bin/env  bash
  2 #
  3 # Author     : Jack zhao
  4 # Data         : 2017/12/08    
  5 # Description: This script is used to monitor the use of local system resources.
  6 
  7 ###The global variable###
  8 export PATH=$PATH:/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin
  9 export LANG=C
 10 INITDIR=`pwd`
 11 LOCAL_TIME=`date '+%F %H:%M:%S'`
 12 BEFORE_DAY=`date -d "-1 days" +%F`
 13 WHOLE_TIME=`date '+%H'`
 14 
 15 ###Mailbox configuration###
 16 ACCENT_MAIL="123456@126.com"
 17 
 18 ###The alarm threshold###
 19 LOAD_AVERAGE_ALARM=2
 20 CPU_ALARM=70
 21 MEMORY_ALARM=70
 22 SWAP_ALARM=30
 23 ROOT_PART_ALARM=95
 24 IN_NETWORK_TRAFFIC_ALARM=167772160
 25 OUT_NETWORK_TRAFFIC_ALARM=167772160
 26 
 27 ###Log variable###
 28 ACCESS_LOG_PATH=localhost_access_log.${BEFORE_DAY}.txt
 29 
 30 
 31 ####Timed task###
 32 TIMING=08
 33 
 34 ###Public function###
 35 out_format(){
 36     printf "%-22s %-5s %-5s \n" "$1" ":" "$2"
 37 }
 38 
 39 ###Send an alarm mail###
 40 send_mail(){
 41     python mail.py "$ACCENT_MAIL" "[Alarm]:$1" "`cat $INITDIR/Monitoring.log`"    
 42 }
 43 
 44 ###Get load information 15 minutes###
 45 get_load_average(){
 46     LOAD_AVERAGE=`uptime | awk -F "," '{print$NF}' | sed 's#[[:space:]]##g' `
 47     out_format "Load average"  "$LOAD_AVERAGE" >> $INITDIR/Monitoring.log
 48     return 0
 49 }
 50 
 51 ###Get CPU usage###
 52 get_cpu_usage(){
 53     CPU_FREE=`vmstat 1 5 |sed -n '3,$p' |awk '{x = x + $15} END {print x/5}' |awk -F. '{print $1}'`
 54     CPU_USAGE=$((100 - $CPU_FREE))
 55     out_format "CPU usage"  "${CPU_USAGE}%" >> $INITDIR/Monitoring.log
 56     return 0
 57 }
 58 
 59 ###Get memory usage###
 60 get_memory_usage(){
 61     MEMORY_USED=`free -m | grep -w "buffers\/cache:" | awk '{print$3}'`
 62     MEMORY_TOTAL=`free -m  | grep "^Mem" | awk '{print$2}'`
 63     MEMORY_USAGE=`echo "scale=2;${MEMORY_USED}/${MEMORY_TOTAL}*100;" | bc -l`
 64     out_format "Memory usage"  "${MEMORY_USAGE}%" >> $INITDIR/Monitoring.log
 65     return 0
 66 }
 67 
 68 ###Get Swap usage###
 69 get_swap_usage(){
 70     SWAP_USED=`free -m | grep -w "^Swap" | awk '{print$3}'`
 71     SWAP_TOTAL=`free -m | grep -w "^Swap" | awk '{print$2}'`
 72     SWAP_USAGE=`echo "scale=2;${SWAP_USED}/${SWAP_TOTAL}*100;" | bc -l`
 73     out_format "Swap usage"  "${SWAP_USAGE}%" >> $INITDIR/Monitoring.log
 74     return 0
 75 }
 76 
 77 ###Get disk usage###
 78 get_disk_usage(){
 79     MOUNT_POINT=`df -hP | egrep -wv  '^tmpfs|Filesystem|boot' | awk '{print$NF}'`
 80     for i in `echo $MOUNT_POINT`;do
 81         DISK_USAGE=`df -hP | grep -w "$i" | awk '{print$5}'`
 82         out_format "Disk usage $i"  "${DISK_USAGE}" >> $INITDIR/Monitoring.log
 83     done
 84     ROOT_PART=`df -hP  | grep -w "/" | awk '{print$5}' | awk -F "%" '{print$1}'`
 85     return  0
 86 }
 87 
 88 ###Get network traffic###
 89 get_network_traffic(){
 90     NETWORK_TRAFFIC=`sar -n DEV 1 60|grep Average|grep eth0|awk '{print "Input:",$5*1000*8,"bps","Output:",$6*1000*8,"bps"}'`
 91     out_format "Network traffic"  "${NETWORK_TRAFFIC}" >> $INITDIR/Monitoring.log
 92     return 0
 93 }
 94 
 95 ###Get website PV,UV###
 96 get_website_pu(){
 97     WEBSITE_PV=`grep -v "^127.0.0.1" ${ACCESS_LOG_PATH}  | wc -l`
 98     out_format "Before Day Website PV" "$WEBSITE_PV" >> $INITDIR/Monitoring.log 
 99     WEBSITE_UV=`grep -v "^127.0.0.1" ${ACCESS_LOG_PATH} | awk '{print$1}' | sort | uniq | wc -l`
100     out_format "Before Day Website UV" "$WEBSITE_UV" >> $INITDIR/Monitoring.log 
101 }
102 
103 ###Server information###
104 IP=`ip addr show eth0 | grep -w "inet" | awk -F "[ /]+" '{print$3}'`
105 HOSTNAME=`hostname`
106 SN=`sudo -b /usr/sbin/dmidecode | grep -A5 "System Information" | grep "Serial Number"| awk -F ":" '{print$2}' | sed 's#^[[:space:]]##g'`
107 OS=`cat /etc/redhat-release`
108 
109 ###Program entrance###
110 echo "############### Monitoring information Date:$LOCAL_TIME ###############" > $INITDIR/Monitoring.log
111 out_format "IP"  "$IP" >> $INITDIR/Monitoring.log
112 out_format "Hostname"  "$HOSTNAME" >> $INITDIR/Monitoring.log
113 out_format "SN"  "$SN" >> $INITDIR/Monitoring.log
114 out_format "Os"  "$OS" >> $INITDIR/Monitoring.log
115 
116 ###Call function###
117 get_load_average
118 get_cpu_usage
119 get_memory_usage
120 get_swap_usage
121 get_disk_usage
122 get_network_traffic
123 get_website_pu
124 
125 ###variable format###
126 LOAD_AVERAGE_FORMAT=`echo $LOAD_AVERAGE | awk -F "." '{print$1}'`
127 MEMORY_USAGE_FORMAT=`echo $MEMORY_USAGE | awk -F "." '{print$1}'`
128 SWAP_USAGE_FORMAT=`echo $SWAP_USAGE | awk -F "." '{print$1}'`
129 IN_NETWORK_TRAFFIC=`echo $NETWORK_TRAFFIC | awk '{print$2}'`
130 OUT_NETWORK_TRAFFIC=`echo $NETWORK_TRAFFIC | awk '{print$5}'`
131 
132 ##Monitoring threshold judgment###    
133 if [ ${LOAD_AVERAGE_FORMAT} -ge ${LOAD_AVERAGE_ALARM} ];then
134     send_mail "$IP Load:High load average." 
135 elif [ $CPU_USAGE -ge $CPU_ALARM ]; then
136     send_mail "$IP CPU:usage greater than ${CPU_ALARM}%." 
137 elif [ $MEMORY_USAGE_FORMAT -ge $MEMORY_ALARM ];then
138     send_mail "$IP Memory:usage greater than ${MEMORY_ALARM}%." 
139 elif [ $SWAP_USAGE_FORMAT -ge $SWAP_ALARM ];then
140     send_mail "$IP Swap:usage greater than ${SWAP_ALARM}%." 
141 elif [ $ROOT_PART -ge $ROOT_PART_ALARM ]; then
142     send_mail "$IP Root Partition:usage greater than ${ROOT_PART_ALARM}%."
143 elif [ $IN_NETWORK_TRAFFIC -ge $IN_NETWORK_TRAFFIC_ALARM ]; then
144     send_mail "$IP Network Traffic:Input Greater than ${IN_NETWORK_TRAFFIC_ALARM}bps."  
145 elif [ $OUT_NETWORK_TRAFFIC -ge $OUT_NETWORK_TRAFFIC_ALARM ]; then
146     send_mail "$IP Network Traffic:Output Greater than ${OUT_NETWORK_TRAFFIC_ALARM}bps." 
147 fi
148 
149 ###Record history log###
150 cat $INITDIR/Monitoring.log >> $INITDIR/Monitor_history.log
151 
152 ###Send a notification email at a time###
153 if [ "$WHOLE_TIME" == "$TIMING" ];then
154     python mail.py "$ACCENT_MAIL" "[Notice]:$IP Monitoring information" "`cat $INITDIR/Monitoring.log`"
155 fi
156 
157 ###Delete temporary files###
158 INITDIR_CONFIRM=${INITDIR:-/tmp}
159 rm -f $INITDIR_CONFIRM/Monitoring.log

2.发送邮件脚本

 1 #!/usr/bin/env python
 2 #-*- coding: UTF-8 -*-
 3 import os,sys
 4 reload(sys)
 5 sys.setdefaultencoding('utf8')
 6 import getopt
 7 import smtplib
 8 from email.MIMEText import MIMEText
 9 from email.MIMEMultipart import MIMEMultipart
10 from  subprocess import *
11 
12 def sendqqmail(username,password,mailfrom,mailto,subject,content):
13     gserver = 'smtp.qq.com'
14     gport = 465
15 
16     try:
17         msg = MIMEText(unicode(content).encode('utf-8'))
18         msg['from'] = mailfrom
19         msg['to'] = mailto
20         msg['Reply-To'] = mailfrom
21         msg['Subject'] = subject
22 
23         smtp = smtplib.SMTP_SSL(gserver, gport)
24         smtp.set_debuglevel(0)
25         smtp.ehlo()
26         smtp.login(username,password)
27 
28         smtp.sendmail(mailfrom, mailto, msg.as_string())
29         smtp.close()
30     except Exception,err:
31         print "Send mail failed. Error: %s" % err
32 
33 
34 def main():
35     to=sys.argv[1]
36     subject=sys.argv[2]
37     content=sys.argv[3]
38     sendqqmail('123456','gkhcvklnjyjsbgwic','123456@qq.com',to,subject,content)
39 if __name__ == "__main__":
40     main()

3.readme配置

 1 1.新建一个监控用户(monitor)
 2     [root@mylab ~]# useradd monitor
 3     [root@mylab ~]# passwd monitor
 4 
 5 2.配置sudo
 6     [root@mylab ~]# cp /etc/sudoers /etc/sudoers.bak.`date +'%F-%T'`
 7     [root@mylab ~]# echo "monitor ALL=(root) NOPASSWD: /usr/sbin/dmidecode" >> /etc/sudoers
 8     [root@mylab ~]# vim /etc/sudoers 
 9     # Defaults    requiretty 
10     将上面行注释掉
11 3.上传脚本文件至/home/monitor
12     mail.py #发送邮件脚本
13     monitor.sh #监控脚本
14 
15 4.配置文件说明
16     mail.py #调用第三方邮箱发送邮件
17         以QQ邮件为例修改mail.py文件:(其他平台修改参数即可,一般公司都有邮件接口或者自己内网搭建一台邮件服务器)
18         gserver = 'smtp.qq.com'    
19         gport = 465
20         sendqqmail('123456','gkhcvklnjyjsbgic','123456@qq.com',to,subject,content)
21             QQ邮箱用户名    IMAP/SMTP第三方登录授权码(QQ邮箱设置生成)QQ邮箱
22             
23     monitor.sh #可监控负载、CPU、内存、SWAP、硬盘、网卡流量、网站PV/UV(需要根据需求修改)    
24         ###Mailbox configuration###
25         ACCENT_MAIL="123456@126.com" #告警接受邮箱
26         
27         告警阀值,可根据需要修改
28         ###The alarm threshold### 
29         LOAD_AVERAGE_ALARM=2 #15分钟内的平均负载,参考:一般小于机器物理CPU个数
30         CPU_ALARM=70 #CPU使用率
31         MEMORY_ALARM=70 #内存使用率
32         SWAP_ALARM=30 #虚拟内存使用率
33         ROOT_PART_ALARM=70 #/挂载点硬盘使用率
34         IN_NETWORK_TRAFFIC_ALARM=167772160 #网卡每分钟接收流量20M
35         OUT_NETWORK_TRAFFIC_ALARM=167772160 #网络每分钟发送流量20M
36         
37         应用日志路径
38         ###Log variable###
39         ACCESS_LOG_PATH=localhost_access_log.${BEFORE_DAY}.txt
40         
41     Monitor_history.log #日志历史记录
42 
43 5.定时发送邮件
44     ####Timed task###
45     TIMING=08 #每天08点发生邮件
46     
47     
48 6.添加自动任务
49     [monitor@mylab ~]$ crontab  -e
50     1 * * * *  /bin/sh  -c  "/home/monitor/monitor.sh &>/dev/null" #每个小时的第一分钟执行一次

4.写的最后

目前在测试环境运行了一段时间,待优化后上生产。哈哈....

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM