2, Prometheus之部署Alertmanager
阿新 • • 發佈:2020-07-18
1,部署Alertmanager 啟動埠為:9093 2,配置Prometheus與Alertmanager通訊 2-1 在Alertmanager 部署機器,設定Alertmanager告警的接受方式。 [root@centos7 alert]# cat alertmanager.yml global: resolve_timeout: 5m #smtp_smarthost: 'smtp.163.com:25' smtp_smarthost: 'smtp.qq.com:465' smtp_from: '[email protected]' smtp_auth_username: '[email protected]' smtp_auth_password: 'xxxxx' ## 授權碼 smtp_require_tls: false route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 1m receiver: 'mail' receivers: - name: 'mail' email_configs: - to: '[email protected]' #inhibit_rules: #告警抑制 # - source_match: # severity: 'critical' # target_match: # severity: 'warning' # equal: ['alertname', 'dev', 'instance'] [root@centos7 alert]# [root@centos7 alert]# [root@centos7 alert]# ./amtool check-config ./alertmanager.yml Checking './alertmanager.yml' SUCCESS Found: - global config - route - 0 inhibit rules - 1 receivers - 0 templates [root@centos7 alert]# 2-2 在Prometheus server端設定與Alertmanager通訊 [root@centos7 prometheus]# cat prometheus.yml global: scrape_interval: 15s evaluation_interval: 15s alerting: alertmanagers: - static_configs: - targets: - 192.168.0.14:9093 rule_files: - "rules/*.yml" scrape_configs: - job_name: 'bj' file_sd_configs: - files: ['/usr/local/prometheus/sd_config/*.yml'] refresh_interval: 5s [root@centos7 prometheus]# 2-3 編寫告警規則 [root@centos7 prometheus]# cat /usr/local/prometheus/rules/first.yml groups: - name: general.rules rules: # Alert for any instance that is unreachable for >5 minutes. - alert: InstanceDown expr: up == 0 for: 1m labels: severity: error annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." [root@centos7 prometheus]# 3,告警狀態 Inactive:這裡什麼都沒有發生。 Pending:已觸發閾值,但未滿足告警持續時間 Firing:已觸發閾值且滿足告警持續時間。警報傳送給接受者