參考:https://github.com/songjiayang/prometheus_practice
https://github.com/kjanshair/docker-prometheus
https://github.com/vegasbrianc/prometheus
alertmanager
mkdir /opt/alertmanager -p
vim /opt/alertmanager/Dockerfile
FROM prom/alertmanager:v0.19.0 EXPOSE 9093
vim /opt/alertmanager/docker-compose.yml
version: "3.7" services: alertmanager: image: v-alertmanager container_name: alertmanager build: context: . dockerfile: Dockerfile restart: always ports: - 9093:9093 volumes: - /opt/alertmanager/etc/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml - /var/lib/alertmanager:/alertmanager command: - '--config.file=/etc/alertmanager/alertmanager.yml' - '--storage.path=/alertmanager'
mkdir -p /var/lib/alertmanager;chmod -R 777 /var/lib/alertmanager
vim /opt/alertmanager/etc/alertmanager/alertmanager.yml
global: resolve_timeout: 5m route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 1h receiver: 'web.hook' receivers: - name: 'web.hook' webhook_configs: - url: 'http://127.0.0.1:5001/' inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance']
global:
resolve_timeout: 5m
route: repeat_interval: 2h receiver: email-1 routes: - match: alertname: httpd_down receiver: email-1 - match: alertname: nginx_down receiver: email-2 receivers: - name: email-1 email_configs: - to: <to-email> from: <from-email> smarthost: <smtp:port> auth_username: "<user-name>" auth_identity: "<user-name>" auth_password: "<user-app-specific-password>" - name: email-2 email_configs: - to: <to-email> from: <from-email> smarthost: <smtp:port> auth_username: "<user-name>" auth_identity: "<user-name>" auth_password: "<user-app-specific-password>"
生成啟動
cd /opt/alertmanager
docker-compose build --no-cache
docker-compose up -d --force-recreate
docker-compose down
docker-compose restart
查看進程
netstat -anltp|grep 9093
查看日志
docker logs --tail="500" alertmanager
docker logs -f alertmanager
進入容器
docker exec -it alertmanager /bin/sh
docker cp alertmanager:/etc/alertmanager/alertmanager.yml /opt/alertmanager/alertmanager_bak.yml
node_exporter
數據采集,不向中央服務器發送,而是等待抓取。默認暴露地址http://CURRENT_IP:9100/metrics
mkdir /opt/node-exporter -p
vim /opt/node-exporter/Dockerfile
FROM prom/node-exporter:v0.18.1 EXPOSE 9100
vim /opt/node-exporter/docker-compose.yml
version: '3.7' services: node-exporter: image: v-node-exporter container_name: node-exporter build: context: . dockerfile: Dockerfile restart: unless-stopped ports: - 9100:9100 volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)"'
注意別與elasticsearch-head的9100端口重疊
生成啟動
cd /opt/node-exporter/
docker-compose build
docker-compose up -d --force-recreate
docker-compose down
docker-compose restart
查看進程
netstat -anltp|grep 9100
查看日志
docker logs --tail="500" node-exporter
docker logs -f node-exporter
進入容器
docker exec -it node-exporter /bin/sh
查看幫助
./node_exporter -h
prometheus
mkdir /opt/prometheus -p
vim /opt/prometheus/Dockerfile
FROM prom/prometheus:v2.14.0 EXPOSE 9090
vim /opt/prometheus/docker-compose.yml
version: "3.7" services: prometheus: image: v-prometheus container_name: prometheus build: context: . dockerfile: Dockerfile restart: always ports: - 9090:9090 volumes: - ./etc/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - /var/lib/prometheus:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles'
mkdir -p /var/lib/prometheus;chmod -R 777 /var/lib/prometheus
chown -R 1000:1000 /opt/prometheus/etc/prometheus
vim /opt/prometheus/etc/prometheus/prometheus.yml
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # alertmanager configuration altering: alertmanagers: - static_configs: - targets: - alertmanagerserver1:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "1_rules.yml" # - "2_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself.
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['prometheusserver1:9090']
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
vim vim /opt/prometheus/etc/prometheus/1_rules.yml
groups: - name: httpd rules: - alert: httpd_down expr: probe_success{instance="http://httpd:80",job="httpd"} == 0 for: 1s labels: severity: critical annotations: summary: "httpd is down" - name: nginx rules: - alert: nginx_down expr: probe_success{instance="http://nginx:80",job="nginx"} == 0 for: 1s labels: severity: warning annotations: summary: "nginx is down"
生成啟動
cd /opt/prometheus/
docker-compose build
docker-compose up -d --force-recreate
docker-compose down
docker-compose restart
查看進程
netstat -anltp|grep 9090
查看日志
docker logs --tail="500" prometheus
docker logs -f prometheus
進入容器
docker exec -it prometheus /bin/sh
docker cp prometheus:/etc/prometheus/prometheus.yml /opt/prometheus/prometheus_bak.yml
PushGateway
mkdir /opt/pushgateway -p
vim /opt/pushgateway/Dockerfile
FROM prom/pushgateway:v1.0.0 EXPOSE 9091
blackbox-exporter
mkdir /opt/blackbox-exporter -p
vim /opt/blackbox-exporter/Dockerfile
FROM prom/blackbox-exporter:v0.16.0 EXPOSE 9115
vim /opt/blackbox-exporter/docker-compose.yml
version: "3.7" services: blackbox-exporter: image: v-blackbox-exporter container_name: blackbox build: context: . dockerfile: Dockerfile restart: unless-stopped ports: - "9115:9115"
Grafana
構建Dockerfile
mkdir /opt/grafana
vim /opt/grafana/Dockerfile
FROM grafana/grafana:6.5.1 EXPOSE 3000
vim /opt/grafana/docker-compose.yml
version: '3.7' services: grafana: image: v-grafana container_name: grafana build: context: . dockerfile: Dockerfile ports: - 3000:3000 environment: - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} - GF_USERS_ALLOW_SIGN_UP=true volumes: - /var/lib/grafana:/var/lib/grafana - /var/log/grafana:/var/log/grafana - ./etc/grafana/provisioning:/etc/grafana/provisioning - /etc/localtime:/etc/localtime:ro restart: unless-stopped
mkdir -p /var/lib/grafana;chmod -R 777 /var/lib/grafana
mkdir -p /var/log/grafana;chmod -R 777 /var/log/grafana
mkdir -p /opt/grafana/etc/grafana/provisioning/datasources;
mkdir -p /opt/grafana/etc/grafana/provisioning/notifiers;
mkdir -p /opt/grafana/etc/grafana/provisioning/dashboards;
chmod -R 777 /opt/grafana/etc/grafana/provisioning
/var/lib/grafana 為數據庫所在目錄
生成啟動
cd /opt/grafana/
docker-compose build
docker-compose up -d --force-recreate
docker-compose down
docker-compose restart
查看進程
netstat -anltp|grep 3000
查看日志
docker logs --tail="500" grafana
docker logs -f grafana
進入容器
docker exec -it grafana /bin/bash
進入網站 用戶名密碼都是admin
prometheus集群 Thanos(略)