由於服務器上各種手動啟動的進程太多,為了方便管理和監控,隨時隨地了解進程的狀態,資源使用情況,特配合進程自動發現並監控報警。
此配置過程是首先定義一個進程列表並同步到所有機器,然后在每台機器上自動發現列表中的進程,監控每個進程是否down掉、CPU、內存占用、線程等。
1,配置要監控的進程列表。格式:進程標識 關鍵字描述
cat process_list_all.txt
# tomcat
/data/apache_projects/tomcat7-express-18034 tomcat流量服務
/usr/local/webserver/tengine/sbin/nginx nginx進程
/usr/local/mysql/bin/mysqld mysql訂單庫
2,配置自動發現腳本
cat process_discovery.sh
process_dir=/usr/local/zabbix/bin/process
ps_list=`ps aux`
#cd $process_dir
cd `dirname $0`
echo '{"data":['
grep -vE "^#|^$|^[[:space:]]+$" process_list_all.txt |while read p1 p2
do
echo "$ps_list" |grep $p1 &>/dev/null
if [ $? -eq 0 ];then
echo '{"{#TABLENAME}":"'$p1'","{#DESC}":"'$p2'"},'
fi
done | sed '$s/,$//'
echo ']}'
3,配置監控項腳本
cat process_monitor.sh
#!/bin/bash
#set -x
process=$1
name=$2
case $2 in
cmdline)
echo $process
;;
cpu)
cat /tmp/ps.txt |grep $process |awk '{a+=$3}END{print a}'
;;
mem)
cat /tmp/ps.txt |grep $process |awk '{a+=$4}END{print a}'
;;
rss)
cat /tmp/ps.txt |grep $process |awk '{a+=$6}END{print a}'
;;
process_status)
grep $process /tmp/ps.txt >/dev/null
#ls /proc/`grep -w $process /tmp/ps.txt|awk '{print $2}'`/status >/dev/null
echo $?
;;
threads)
cat /proc/`grep -w $process /tmp/ps.txt|awk '{print $2}'`/status|grep Threads|awk '{print $2}'
;;
futex)
ps mp `grep -w $process /tmp/ps.txt| awk '{print $2}'` -o THREAD,tid | grep futex_ | wc -l
;;
#port)
#ss -tnlp|grep `grep -w $process /tmp/ps.txt|awk '{print $2}'`|awk '{print $4}'|awk -F: '{print $NF}'
#;;
#portcount)
#ss -tnlp|grep `grep -w $process /tmp/ps.txt|awk '{print $2}'`|awk '{print $4}'|awk -F: '{print $NF}'|wc -l
#;;
*)
echo "Error input:"
;;
esac
exit 0
4,添加ps進程快照文件到crontab
*/1 * * * * ps aux > /tmp/ps1.txt && mv /tmp/ps1.txt /tmp/ps.txt
5,配置process_discovery.conf文件
cat /etc/zabbix/zabbix_agentd.d/process_discovery.conf
#ps process
UserParameter=process.discovery,/etc/zabbix/scripts/process_discovery.sh
UserParameter=process[*],/etc/zabbix/scripts/processmonitor.sh $1 $2