1. 程式人生 > 實用技巧 >2, Prometheus之部署Alertmanager

2, Prometheus之部署Alertmanager

1,部署Alertmanager
啟動埠為:9093

2,配置Prometheus與Alertmanager通訊

2-1 在Alertmanager 部署機器,設定Alertmanager告警的接受方式。
[root@centos7 alert]# cat alertmanager.yml
global:
  resolve_timeout: 5m
  #smtp_smarthost: 'smtp.163.com:25'
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '[email protected]'
  smtp_auth_username: '[email protected]'
  smtp_auth_password: 'xxxxx' ## 授權碼
  smtp_require_tls: false

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1m
  receiver: 'mail'
receivers:
- name: 'mail'
  email_configs:
  - to: '[email protected]'
#inhibit_rules: #告警抑制
#  - source_match:
#      severity: 'critical'
#    target_match:
#      severity: 'warning'
#    equal: ['alertname', 'dev', 'instance']
[root@centos7 alert]# 
[root@centos7 alert]# 
[root@centos7 alert]# ./amtool check-config ./alertmanager.yml
Checking './alertmanager.yml'  SUCCESS
Found:
 - global config
 - route
 - 0 inhibit rules
 - 1 receivers
 - 0 templates

[root@centos7 alert]# 

2-2 在Prometheus server端設定與Alertmanager通訊
[root@centos7 prometheus]# cat prometheus.yml
global:
  scrape_interval:     15s
  evaluation_interval: 15s 

alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - 192.168.0.14:9093

rule_files:
  - "rules/*.yml"

scrape_configs:
  - job_name: 'bj'
    file_sd_configs:
      - files: ['/usr/local/prometheus/sd_config/*.yml']
        refresh_interval: 5s
[root@centos7 prometheus]# 

2-3 編寫告警規則
[root@centos7 prometheus]# cat /usr/local/prometheus/rules/first.yml 
groups:
- name: general.rules 
  rules:
# Alert for any instance that is unreachable for >5 minutes.
  - alert: InstanceDown
    expr: up == 0
    for: 1m
    labels:
      severity: error
    annotations:
      summary: "Instance {{ $labels.instance }} down"
      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
[root@centos7 prometheus]# 


3,告警狀態
	Inactive:這裡什麼都沒有發生。
	Pending:已觸發閾值,但未滿足告警持續時間
	Firing:已觸發閾值且滿足告警持續時間。警報傳送給接受者