比較粗略的一個腳本:主要監控系統磁盤、CPU、內存、網絡流量、tcp連接數等
代碼如下:
[root@test system_monitor_shell_script]# cat system_monitor.sh #!/bin/bash #. /home/hduser/modify_system_variable.sh . /data/script_ssh/system_monitor_shell_script/modify_system_variable.sh #1、系統基本信息 ################################################################ echo -e "\033[32m############# 系統時間 #############\033[0m" echo System_Now_Time: `date "+%Y-%m-%d %H:%M:%S"` #獲取系統時間也可以使用who -b echo System_Boot_Time: `date -d "$(awk -F. '{print $1}' /proc/uptime) second ago" +"%Y-%m-%d %H:%M:%S"` echo -e "\033[32m############ 主機名 ###############\033[0m" echo Hostname: `hostname` echo -e "\033[32m########## Ip地址和網卡流量 ##########\033[0m" #獲取服務器的網卡和IP地址,以便在網卡流量監控時使用。 for Network in `ifconfig |awk -F : '/^[[:alpha:]]/{print $1}'` do FLOWA=/tmp/.$Network grep $Network: /proc/net/dev|awk -F: '{print $2}'|awk '{print "INPUT " $1 " " "OUTPUT " $9}' > $FLOWA #第一次獲取網卡流量,單位字節 INPUTA=`awk '{print $2}' $FLOWA` OUTPUTA=`awk '{print $4}' $FLOWA` #獲取網卡流量間隔時間 sleep $NETTIME #第二次獲取網卡流量,單位字節 grep $Network: /proc/net/dev|awk -F: '{print $2}'|awk '{print "INPUT " $1 " " "OUTPUT " $9}' > $FLOWA INPUTB=`awk '{print $2}' $FLOWA` OUTPUTB=`awk '{print $4}' $FLOWA` #計算 #轉換網卡流量單位為KB INPUTC=`echo -e "$INPUTB-$INPUTA" | bc` OUTPUTC=`echo -e "$OUTPUTB-$OUTPUTA"| bc` INPUTMBA=`echo -e $INPUTC |awk '{printf "%0.3f\n",$1/1024}'` OUTPUTMBA=`echo -e $OUTPUTC |awk '{printf "%0.3f\n",$1/1024}'` INPUT=`echo -e $INPUTC |awk '{printf "%0.3f\n",$1/1024/1024}'` OUTPUT=`echo -e $OUTPUTB |awk '{printf "%0.3f\n",$1/1024/1204}'` #判斷網卡流量使用量 if [ $INPUTC -le 1048576 ];then if [ $OUTPUTC -le 1048576 ];then echo Network_Card: $Network Ip: `/sbin/ifconfig $Network|grep "inet "|awk '{print $2}'` "輸入流量: " ${INPUTMBA}K/S "輸出流量: " "${OUTPUTMBA}K/S " else echo Network_Card: $Network Ip: `/sbin/ifconfig $Network|grep "inet "|awk '{print $2}'` "輸入流量: " ${INPUTMBA}K/S "輸出流量: " "${OUTPUT}M/S " fi elif [ $INPUTC -gt 1048576 ];then if [ $OUTPUTC -gt 1048576 ];then echo Network_Card: $Network Ip: `/sbin/ifconfig $Network|grep "inet "|awk '{print $2}'` "輸入流量: " ${INPUT}M/S "輸出流量: " "${OUTPUTMBA}K/S " else echo Network_Card: $Network Ip: `/sbin/ifconfig $Network|grep "inet "|awk '{print $2}'` "輸入流量: " ${INPUT}M/S "輸出流量:" "${OUTPUT}M/S " fi fi done #2、CPU監控 ############################################################## echo -e "\033[32m############### CPU平均負載和系統進程數 ##############\033[0m" #us(user time):用戶進程執行消耗cpu時間;sy(system time):系統進程執行消耗cpu時間;id:空閑時間(包括IO等待時間);wa:等待IO時間。 #CPULIMIT=90 CPU_us=$(vmstat | awk '{print $13}' | sed -n '$p') CPU_sy=$(vmstat | awk '{print $14}' | sed -n '$p') CPU_id=$(vmstat | awk '{print $15}' | sed -n '$p') CPU_wa=$(vmstat | awk '{print $16}' | sed -n '$p') CPU_st=$(vmstat | awk '{print $17}' | sed -n '$p') #計算服務器CPU使用率 CPU1=`cat /proc/stat | grep 'cpu ' | awk '{print $2" "$3" "$4" "$5" "$6" "$7" "$8}'` sleep $CPUTIME CPU2=`cat /proc/stat | grep 'cpu ' | awk '{print $2" "$3" "$4" "$5" "$6" "$7" "$8}'` IDLE1=`echo -e $CPU1 | awk '{print $4}'` IDLE2=`echo -e $CPU2 | awk '{print $4}'` CPU1_TOTAL=`echo -e $CPU1 | awk '{print $1+$2+$3+$4+$5+$6+$7}'` CPU2_TOTAL=`echo -e $CPU2 | awk '{print $1+$2+$3+$4+$5+$6+$7}'` IDLE=`echo -e "$IDLE2-$IDLE1" | bc` CPU_TOTAL=`echo -e "$CPU2_TOTAL-$CPU1_TOTAL" | bc` RATE=`echo -e "scale=4;($CPU_TOTAL-$IDLE)/$CPU_TOTAL*100" | bc | awk '{printf "%.2f",$1}'` echo "用戶進程占用CPU時間: us=$CPU_us" ; echo "系統進程消耗CPU時間: sy=$CPU_sy"; echo "CPU空閑時間: id=$CPU_id" ; echo "等待I/O時間: wa=$CPU_wa"; echo "CPU使用率: ${RATE}%" CPU_RATE=`echo -e $RATE | cut -d. -f1` load_average=$(uptime |gawk -F':' '{print $NF}') running_process=$(top -b -d 1 -n 1 |sed -n '2p' |awk -F',' '{print $2}' |awk '{print $1}') #running_process=`ps aux |wc -l` total_process=$(ps -ef |wc -l) echo echo "CPU_load_average: ${load_average}" echo "running_process:"" " ${running_process} echo "total_process:"" " ${total_process} Host_running_time=$(uptime |sed 's/^.*up//' | awk -F "," '{print $1}') User_connection_number=$(uptime |cut -d , -f 3|awk '{print $1}') #cat /proc/uptime| awk -F. '{run_days=$1 / 86400;run_hour=($1 % 86400)/3600;run_minute=($1 % 3600)/60;run_second=$1 % 60;printf("系統已運行:%d天%d時%d分%d秒\n",run_days,run_hour,run_minute,run_second)}' echo -e "Host_running_time: `cat /proc/uptime| awk -F. '{run_days=$1 / 86400;run_hour=($1 % 86400)/3600;run_minute=($1 % 3600)/60;run_second=$1 % 60;printf("%d天%d時%d分%d秒\n",run_days,run_hour,run_minute,run_second)}'`" echo -e "User_connection_number: ${User_connection_number}\n" #CPU占用最多的十個進程 CPUA=`expr $RATE \> $CPULIMIT` if [ $CPUA -eq 1 ];then echo "$time1 CPU警告:當前CPU使用率$RATE%,大於$CPULIMIT%"; echo \ ; echo "process使用CPU情況:" ps aux |sort -k3nr |head |awk 'BEGIN{printf "%-10s%-10s%-10s%-10s\n","USER","PID","%CPU","COMMAND"}{printf "%-10s%-10d%-10.1f%-10s\n",$1,$2,$3,$11}' fi echo #3、內存監控 #################################################################### echo -e "\033[32m########### 系統內存使用情況 ##############\033[0m" #內存使用情況 #MEMLIMIT=80 Total_Mem=$(free -m |sed -n '2p' |awk '{print $2}') Usage_Mem=$(free -m |sed -n '2p' |awk '{print $3}') Free_Mem=$(free -m |sed -n '2p' |awk '{print $4}') Mem_Usage_Percent=`free -m |sed -n '2p'|awk '{printf "%-1d",$3/$2*100}'` #交換分區使用情況 Swap_Total_Mem=$(free -m |grep Swap |sed -n 'p' |awk '{print $2}') Swap_Usage_Mem=$(free -m |grep Swap |sed -n 'p' |awk '{print $3}') Swap_Free_Mem=$(free -m |grep Swap |sed -n 'p' |awk '{print $4}') Swap_Mem_Usage_Percent=`free -m |grep Swap|sed -n 'p'|awk '{printf "%-1d",$3/$2*100}'` echo "總內存:"${Total_Mem}M echo "使用內存: ${Usage_Mem}M"; echo "剩余內存: ${Free_Mem}M" echo -e "內存使用率: ${Mem_Usage_Percent}%\n" echo "交換分區使用情況:" echo "總內存: ${Swap_Total_Mem}M" ;echo "使用內存: ${Swap_Usage_Mem}M" echo "剩余內存: ${Swap_Free_Mem}M";echo -e "使用率: ${Swap_Mem_Usage_Percent}%\n" #判斷內存使用是否超過80% MA=`expr $Mem_Usage_Percent \> $MEMLIMIT` if [ $MA -eq 1 ];then echo "$time1 內存警告,當前內存使用$Mem_Usage_Percent% 大於$MEMLIMIT%" fi MB=`expr $Swap_Mem_Usage_Percent \> $MEMLIMIT` if [ $MB -eq 1 ];then echo "$time1 內存警告,當前內存$Swap_Mem_Usage_Percent% 大於$MEMLIMIT%" fi echo "process使用內存情況:" ps aux |sort -k4nr |head |awk 'BEGIN{printf "%-10s%-10s%-10s%-10s\n","USER","PID","%MEM","COMMAND"}{printf "%-10s%-10d%-10.1f%-10s\n",$1,$2,$4,$11}' #PCPID=`ps aux |sort -k4nr |head |awk 'BEGIN{printf "%-10s%-10s%-10s%-10s\n","USER","PID","%MEM","COMMAND"}{printf "%-10s%-10d%-10.1f%-10s\n",$1,$2,$4,$11}' |grep -v -i user |awk '{print $2}'` #PDMEM=`ps aux |sort -k4nr |head |awk 'BEGIN{printf "%-10s%-10s%-10s%-10s\n","USER","PID","%MEM","COMMAND"}{printf "%-10s%-10d%-10.1f%-10s\n",$1,$2,$4,$11}' |grep -v -i user |awk '{print $3}'` #for i in $PCPID #do # echo $i;ps -ef |grep $i |awk '{print $NF}' |head -1 #done echo #4、磁盤監控 ################################################################# #ROOT=80 #VAR=90 #HOME=90 #BOOT=80 #DATA=90 NULL=/dev/null echo -e "\033[32m############### 磁盤使用情況 ################\033[0m" disk=/tmp/.disk #parted適用於Ubuntu #parted -l |grep -i 'disk' |grep '/dev/[a-z]d[a-z]' |awk -F: '{print $1}' |awk '{print $2}' > /tmp/.disknumber #/sbin/fdisk -l |grep -i 'disk' |grep '/dev/[a-z]d[a-z]' |awk -F: '{print $1}' |awk '{print $2}' > /tmp/.disknumber disk_data_usage=`df -h |grep "data$" |awk '{print $5}'|awk -F% '{print $1}'` disk_root_usage=`df -h |grep "/$" |awk '{print $5}'|awk -F% '{print $1}'` disk_home_usage=`df -h |grep "home$" |awk '{print $5}'|awk -F% '{print $1}'` disk_var_usage=`df -h |grep "var" |awk '{print $5}'|awk -F% '{print $1}'` disk_boot_usage=`df -h |grep "boot" |awk '{print $5}'|awk -F% '{print $1}'` disk_data2_usage=`df -h |grep "data2" |awk '{print $5}'|awk -F% '{print $1}'` disk_data3_usage=`df -h |grep "data3" |awk '{print $5}'|awk -F% '{print $1}'` disk_data4_usage=`df -h |grep "data4" |awk '{print $5}'|awk -F% '{print $1}'` disk_data5_usage=`df -h |grep "data5" |awk '{print $5}'|awk -F% '{print $1}'` #獲取磁盤分區使用情況 df -h |awk '{print $NF}' > /tmp/.disk grep home $disk > $NULL if [ $? -eq 0 ];then if [ $disk_home_usage -ge $HOME ];then echo " "/home分區警告:當前使用$disk_home_usage%,大於$HOME else echo " "當前/home分區使用:" "$disk_home_usage% fi fi grep "/$" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_root_usage -ge $ROOT ];then echo " "/分區警告:當前使用$disk_root_usage%,大於$ROOT else echo " "當前/分區使用:" "$disk_root_usage% fi fi grep "var" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_var_usage -ge $VAR ];then echo " "/var分區警告:當前使用$disk_var_usage%,大於$VAR else echo " "當前/var分區使用:" "$disk_var_usage% fi fi grep "boot" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_boot_usage -ge $BOOT ];then echo " "/boot分區警告:當前使用$disk_boot_usage%,大於$BOOT else echo " "當前/boot分區使用:" "$disk_boot_usage% fi fi grep "data" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_data_usage -ge $DATA ];then echo " "/data分區警告:當前使用$disk_data_usage%,大於$DATA else echo " "當前/data分區使用:" "$disk_data_usage% fi fi grep "data2" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_data2_usage -ge $DATA ];then echo " "/data2分區警告:當前使用$disk_data2_usage%,大於$DATA else echo " "當前/data2分區使用:" "$disk_data2_usage% fi fi grep "data3" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_data3_usage -ge $DATA ];then echo " "/data3分區警告:當前使用$disk_data3_usage%,大於$DATA else echo " "當前/data3分區使用:" "$disk_data3_usage% fi fi grep "data4" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_data4_usage -ge $DATA ];then echo " "/data4分區警告:當前使用$disk_data4_usage%,大於$DATA else echo " "當前/data4分區使用:" "$disk_data4_usage% fi fi grep "data5" $disk > $NULL if [ $? -eq 0 ];then if [ $disk_data5_usage -ge $DATA ];then echo " "/data5分區警告:當前使用$disk_data5_usage%,大於$DATA else echo " "當前/data5分區使用:" "$disk_data5_usage% fi fi echo #5、連接數 ###################################################################### tcpfile=/tmp/.tcp #查看並發連接數 #描述 #CLOSED:無連接是活動的或正在進行 #LISTEN:服務器在等待進入呼叫 #SYN_RECV:一個連接請求已經到達,等待確認 #SYN_SENT:應用已經開始,打開一個連接 #ESTABLISHED:正常數據傳輸狀態 #FIN_WAIT1:應用說它已經完成 #FIN_WAIT2:另一邊已同意釋放 #ITMED_WAIT:等待所有分組死掉 #CLOSING:兩邊同時嘗試關閉 #TIME_WAIT:另一邊已初始化一個釋放 #LAST_ACK:等待所有分組死掉 netstat -n | awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}' > $tcpfile grep TIME_WAIT $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "TIME_WAIT 0 " >> $tcpfile fi grep FIN_WAIT1 $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "FIN_WAIT1 0 " >> $tcpfile fi grep FIN_WAIT2 $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "FIN_WAIT2 0 " >> $tcpfile fi grep CLOSE_WAIT $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "CLOSE_WAIT 0 " >> $tcpfile fi grep LAST_ACK $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "LAST_ACK 0 " >> $tcpfile fi grep SYN_RECV $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "SYN_RECV 0 " >> $tcpfile fi grep CLOSING $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "CLOSING 0 " >> $tcpfile fi grep ESTABLISHED $tcpfile > $NULL if [ $? -eq 1 ] ; then echo "ESTABLISHED 0 " >> $tcpfile fi TIME_WAITV=`grep TIME_WAIT $tcpfile | awk '{print $2}'` FIN_WAIT1V=`grep FIN_WAIT1 $tcpfile | awk '{print $2}'` FIN_WAIT2V=`grep FIN_WAIT2 $tcpfile | awk '{print $2}'` ESTABLISHEDV=`grep ESTABLISHED $tcpfile | awk '{print $2}'` SYN_RECVV=`grep SYN_RECV $tcpfile | awk '{print $2}'` CLOSINGV=`grep CLOSING $tcpfile | awk '{print $2}'` CLOSE_WAITV=`grep CLOSE_WAIT $tcpfile | awk '{print $2}'` LAST_ACKV=`grep LAST_ACK $tcpfile | awk '{print $2}'` echo -e "\033[32m###########Tcp連接數##################\033[0m" echo " "當前TIME_WAIT" " 連接數為 $TIME_WAITV 個。 echo " "當前FIN_WAIT1" " 連接數為 $FIN_WAIT1V 個。 echo " "當前FIN_WAIT2" " 連接數為 $FIN_WAIT2V 個。 echo " "當前CLOSE_WAIT" " 連接數為 $CLOSE_WAITV 個。 echo " "當前ESTABLISHED"" 連接數為 $ESTABLISHEDV 個。 echo " "當前SYN_RECV" " 連接數為 $SYN_RECVV 個。 echo " "當前LAST_ACKV" " 連接數為 $LAST_ACKV 個。 echo " "當前CLOSING" " 連接數為 $CLOSINGV 個。; echo \ ; echo -e "\033[31m ----------------------------END--------------------------\033[0m"
modify_system_variable.sh文件如下
#!/bin/bash #計算CPU利用率的時間間隔。 CPUTIME=2 # 單位 s #CPU限制,當系統使用CPU達到下面值時報警。 CPULIMIT=90 #單位 % #內存限制,當系統使用內存達到下面值時報警。 MEMLIMIT=80 #單位 % #磁盤監控。 ROOT=80 # / 分區使用限制。單位 % VAR=90 # var 分區使用限制。 單位 % HOME=90 # home 分區使用限制。單位 % BOOT=80 # boot 分區使用限制。 單位 % DATA=90 # data1 分區使用限制。單位 % #計算網卡流量時間間隔 NETTIME=3 #單位 s