1 #!/bin/bash 2 3 #提取本服務器的IP地址信息 4 ENO1=`ifconfig | sed -n '1,1p' | awk -F ' ' '{print $1}'` 5 IP=`ifconfig ${ENO1}| grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " "` 6 echo 'eno: '${ENO1} 7 echo 'IP: '${IP} 8 9 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 10 # 各個監控警告值 11 WARN_LOAD=1.1 12 WARN_CPU=0.95 13 WARN_MEM=0.93 14 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 15 16 17 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 18 CPU=0.0 19 LOAD1=0.0 20 LOAD5=0.0 21 LOAD15=0.0 22 MEM=0.0 23 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 24 25 26 send_mail() { 27 echo 'send mail ' "${1}" "${2}" 28 } 29 30 31 # 1、監控系統負載的變化情況. 32 system_load_func() 33 { 34 cpu_num=`grep -c 'model name' /proc/cpuinfo` 35 36 load=$(uptime | awk -F 'load average: ' '{print $2}') 37 load_1=$(echo $load | awk -F ', ' '{print $1}') 38 load_5=$(echo $load | awk -F ', ' '{print $2}') 39 load_15=$(echo $load | awk -F ', ' '{print $3}') 40 41 #計算當前系統單個核心平均負載值,結果小於1.0時前面個位數補0。 42 LOAD1=`echo "scale=2;a=${load_1}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 43 LOAD5=`echo "scale=2;a=${load_5}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 44 LOAD15=`echo "scale=2;a=${load_15}/${cpu_num};if(length(a)==scale(a)) print 0;print a" | bc` 45 46 if [ `echo "${LOAD15} > ${WARN_LOAD}" | bc` -eq 1 ] 47 then 48 send_mail "${IP}服務器15分鍾的系統平均負載為${LOAD15}, 超過警戒值${WARN_LOAD}, 請立即處理!" "$IP 服務器系統負載告警!" 49 fi 50 } 51 52 53 # 2、監控系統cpu的情況. 54 cpu_func() 55 { 56 #取當前空閑cpu百份比值(只取整數部分) 57 cpu_idle=`top -b -d 0.1 -n 2 | grep Cpu | tail -n 1 | awk '{print $8}' | cut -f 1 -d "."` 58 CPU=0`echo "scale=2; (100 - ${cpu_idle}) / 100" | bc` 59 60 if [ `echo "${CPU} > ${WARN_CPU}" | bc` -eq 1 ] 61 then 62 send_mail "${IP}服務器cpu使用${CPU},請及時處理." "$IP 服務器CPU告警" 63 fi 64 } 65 66 67 #3. 監控物理內存 68 mem_func() 69 { 70 mem=$(free -m | sed -n '2,2p') 71 mem_total=`echo ${mem} | awk -F ' ' '{print $2}'` 72 mem_used=`echo ${mem} | awk -F ' ' '{print $3}'` 73 # mem_free=`echo ${mem} | awk -F ' ' '{print $4}'` 74 75 if [ $mem_used -ne 0 ] 76 then 77 MEM=0`echo "scale=2; ${mem_used} / ${mem_total}" | bc` 78 if [ `echo "${MEM} > ${WARN_MEM}" | bc` -eq 1 ] 79 then 80 send_mail "$IP服務器物理內存已使用 ${MEM},請及時處理." "$IP 服務器內存告警" 81 fi 82 fi 83 } 84 85 86 CNT=1 87 print_info() { 88 echo "# # # # # # # # # # # [${CNT}] # # # # # # # # # # # # # #" 89 echo cpu: ${CPU} 90 echo load1: ${LOAD1} 91 echo load5: ${LOAD5} 92 echo load15: ${LOAD15} 93 echo mem: ${MEM} 94 echo "# # # # # # # # # # # # # # # # # # # # # # # # # # #" 95 (( CNT += 1 )) 96 # exit 0 97 } 98 99 100 while true 101 do 102 system_load_func 103 cpu_func 104 mem_func 105 print_info 106 sleep 5 107 done
注: 需要下載bc命令:
sudo apt-get install bc
這里我只在腳本里面寫了獲取各個監控信息的數據, 發送郵件和數據寫入數據庫這些我省略掉了. 發送郵件我使用的是python, 通過該shell腳本調用python發送郵件.(因為用linux自帶的我發現會出現很多垃圾郵件, 怎么解決我沒有去研究). 得到的數據我會寫入數據庫記錄, 在這里為了通用性, 我把寫入數據庫部分省略掉了.