最近看到一个钉钉软件机器人,觉得非常好,比起邮件、微信告警,舒服多了,特献上代码内容。
[root@k8s-master ~]# nohup python test.py & # 可以参考下篇文章,python进程构成一个systemd服务后台运行,每5秒监控更佳
1 #coding: utf-8
2 import psutil 3 import requests 4 import time 5 import os 6 import json 7
8 monitor_name = set(['httpd','cobblerd']) # 用户指定监控的服务进程名称
9
10 proc_dict = {} 11 proc_name = set() # 系统检测的进程名称
12 monitor_map = { 13 'httpd': 'systemctl restart httpd', 14 'cobblerd': 'systemctl restart cobblerd' # 系统在进程down掉后,自动重启
15 } 16
17 dingding_url = 'https://oapi.dingtalk.com/robot/send?access_token=b5258c4335ed8ab792075013c965efcbf4f8940f92e7bd936cdc7842d3bf9405'
18 # 钉钉机器人token使用参考文档:http://www.pc6.com/infoview/Article_108931.html
19
20 while True: 21 for proc in psutil.process_iter(attrs=['pid','name']): 22 proc_dict[proc.info['pid']] = proc.info['name'] 23 proc_name.add(proc.info['name']) 24
25 proc_stop = monitor_name - proc_name # 通过集合的形式来找出停掉的进程名,前者有但是后者没有的
26
27 if proc_stop: # 如果确实有监控的进程停掉了,那么我们需要告警以及自动重启功能
28 for p in proc_stop: 29 p_status = '停止'
30 p_name = p 31 data = { 32 "msgtype": "markdown", 33 "markdown": { 34 "title": "监控信息", 35 "text": "### %s\n" % time.strftime("%Y-%m-%d %X") +
36 "> #### 服务名:%s \n\n" % p_name +
37 "> #### 状态:%s \n\n" % p_status +
38 "> #### 正在尝试启动"
39 }, 40 } 41 headers = {'Content-Type':'application/json;charset=UTF-8'} 42 send_data = json.dumps(data).encode('utf-8') 43 requests.post(url=dingding_url,data=send_data,headers=headers) 44
45 os.system(monitor_map[p_name]) # 执行重启命令,然后判断是否重启成功
46 proc_set = set() 47 for proc_again in psutil.process_iter(attrs=['pid','name']): 48 proc_set.add(proc_again.info['name']) 49
50 if p in proc_set: # 如果进程启动成功,p是以前停掉的进程,proc_set是已经重启过一次后的所有进程集合
51 p_status = '成功'
52 p_name = p 53 data = { 54 "msgtype": "markdown", 55 "markdown": { 56 "title": "监控信息", 57 "text": "### %s\n" % time.strftime("%Y-%m-%d %X") +
58 "> #### 服务名:%s \n\n" % p_name +
59 "> #### 状态:%s \n\n" % p_status +
60 "> #### 已经启动成功,服务正在运行!"
61 }, 62 } 63 headers = {'Content-Type':'application/json;charset=UTF-8'} 64 send_data = json.dumps(data).encode('utf-8') 65 requests.post(url=dingding_url,data=send_data,headers=headers) 66 else: 67 p_status = '重启失败'
68 p_name = p 69 data = { 70 "msgtype": "markdown", 71 "markdown": { 72 "title": "监控信息", 73 "text": "### %s\n" % time.strftime("%Y-%m-%d %X") +
74 "> #### 服务名:%s \n\n" % p_name +
75 "> #### 状态:%s \n\n" % p_status +
76 "> #### Sorry,服务启动失败鸟!"
77 }, 78 } 79 headers = {'Content-Type':'application/json;charset=UTF-8'} 80 send_data = json.dumps(data).encode('utf-8') 81 requests.post(url=dingding_url,data=send_data,headers=headers) 82 time.sleep(5)
钉钉软件结果图片