1. 程式人生 > 其它 >prometheus node-exporter cadvisor grafana alertmanager 安裝及服務發現

prometheus node-exporter cadvisor grafana alertmanager 安裝及服務發現

prometheus node-exporter cadvisor grafana alertmanager 安裝及服務發現

本次搭建基於docker環境

前期準備

  拉取映象

docker pull google/cadvisor
docker pull prom/prometheus
docker pull grafana/grafana
docker pull prom/alertmanager

  建立持久化目錄

mkdir /home/prometheus/config
vim /home/prometheus/prometheus.yml
mkdir /home/grafana-storage

啟動node-exporter硬體系統監控

docker run -d -p 9100:9100 \
-v /proc:/host/proc:ro \
-v /sys:/host/sys:ro \
-v /:/rootfs:ro \
--name=node-exporter \
prom/node-exporter

  

啟動cadvisor容器監控

docker run \
-v /:/rootfs:ro \
-v /var/run:/var/run:rw \
-v /sys:/sys:ro \
-v /var/lib/docker/:/var/lib/docker:ro \
-p 9080:8080 \
--detach=true \
--name=cadvisor \
google/cadvisor
#--detach=true #分離容器

  

啟動grafana

docker run -d -p 3000:3000 \
--user=root \
--name=grafana \
-v /home/grafana-storage:/var/lib/grafana \
grafana/grafana
#--user=root  #以root使用者執行

  

  grafana模板地址https://grafana.com/grafana/dashboards

啟動prometheus

docker run  -d -p 9090:9090 \
--name prometheus \
-v /home/prometheus:/etc/prometheus \
-v /home/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
prom/prometheus \
--web.enable-lifecycle \
--config.file="/etc/prometheus/prometheus.yml"

#--web.enable-lifecycle							#熱載入引數,需要配合配置檔案--config.file使用,否則會報錯
#curl -X POST http://localhost:9090/-/reload	#熱載入prometheus配置檔案
#--config.file									#配置檔案路徑
#--storage.tsdb.path="/etc/prometheus/data"		#資料儲存路徑

  Prometheus配置grafana

[root@localhost prometheus]# vim prometheus.yml
global:             # 全域性設定,可以被覆蓋
  scrape_interval: 15s              # 抓取取樣資料的時間間隔,每15秒去被監控機上取樣,即資料採集頻率
  evaluation_interval: 15s          # 監控資料規則的評估頻率,比如設定檔案系統使用率>75%發出告警則每15秒執行一次該規則,進行檔案系統檢查

#告警管理
alerting:
    alertmanagers:
    - static_configs:                       #告警靜態目標配置
#      - targets: ['192.168.31.131:9093']    #告警ui地址

#告警規則
rule_files:
#- /etc/prometheus/rules/*.rules     #告警規則檔案路徑

scrape_configs:             # 抓取配置
#靜態發現
  - job_name: 'grafana'     #任務名 全域性唯一
    scrape_interval: 5s     # 抓取取樣資料的時間間隔
    static_configs:         #靜態目標配置
      - targets: ['192.168.31.131:3000']   #抓取地址,預設為/metrics
        labels:             #標籤
          instance: grafana
curl -X POST http://localhost:9090/-/reload		#熱載入prometheus配置檔案

  

啟動pushgateway

docker run -d \
--name pushgateway \
-p 9091:9091 \
prom/pushgateway

  推送exporter到pushgateway

curl http://localhost:9090/metrics | curl --data-binary @- http://192.168.31.158:9091/metrics/job/prometheus/instance/131-普羅米修斯
curl http://192.168.31.158:9104/metrics | curl --data-binary @- http://192.168.31.158:9091/metrics/job/mysql/instance/158-MYSQL

  

注:推送到pushgateway的指標不會顯示在prometheus的網頁介面上,只能通過promsql查詢

  刪除指標

curl -X DELETE http://192.168.31.158:9104/metrics/job/mysql

  Prometheus配置pushgateway

[root@localhost prometheus]# vim prometheus.yml
scrape_configs:             # 抓取配置
#靜態發現
  - job_name: 'grafana'     #任務名 全域性唯一
    scrape_interval: 5s     # 抓取取樣資料的時間間隔
    static_configs:         #靜態目標配置
      - targets: ['192.168.31.131:3000']   #抓取地址,預設為/metrics
        labels:             #標籤
          instance: grafana
#pushgateway中轉
  - job_name: pushgateway
    static_configs:
      - targets: ['192.168.31.158:9091']
        labels:
          instance: pushgateway

  

curl -X POST http://localhost:9090/-/reload		#熱載入prometheus配置檔案

  

啟動告警管理alertmanager

docker run --name alertmanager -d -p 9093:9093 prom/alertmanager
docker cp alertmanager:/etc/alertmanager/alertmanager.yml /home/alertmanager/
docker rm -f alertmanager

docker run -d --name alertmanger -p 9093:9093 \
-v /home/alertmanager:/etc/alertmanager \
prom/alertmanager

#--storage.path   資料儲存路徑
#--config.file    配置檔案路徑

  

  Prometheus配置alertmanager連線

[root@localhost prometheus]# cat rules/hoststats-alert.rules 
groups:
- name: hostStatsAlert
  rules:
  - alert: hostCpuUsageAlert
    expr: sum(avg without (cpu)(irate(node_cpu_seconds_total{mode!="idle"}[5m]))) by (instance) > 0.85
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "Instance {{ $labels.instance }} CPU usgae high"
      description: "{{ $labels.instance }} CPU usage above 85% (current value: {{ $value }})"
  - alert: hostMemUsageAlert
    expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes > 0.85
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "Instance {{ $labels.instance }} MEM usgae high"
      description: "{{ $labels.instance }} MEM usage above 85% (current value: {{ $value }})"

  

[root@localhost prometheus]# vim prometheus.yml
global:             # 全域性設定,可以被覆蓋
  scrape_interval: 15s              # 抓取取樣資料的時間間隔,每15秒去被監控機上取樣,即資料採集頻率
  evaluation_interval: 15s          # 監控資料規則的評估頻率,比如設定檔案系統使用率>75%發出告警則每15秒執行一次該規則,進行檔案系統檢查

#告警管理
alerting:
    alertmanagers:
    - static_configs:                       #告警靜態目標配置
      - targets: ['192.168.31.131:9093']    #告警ui地址

#告警規則
rule_files:
- /etc/prometheus/rules/*.rules     #告警規則檔案路徑

  

curl -X POST http://localhost:9090/-/reload		#熱載入prometheus配置檔案

  

配置基於檔案發現

[root@localhost config]# cat /home/prometheus/config/target.yml 
- targets: ['192.168.31.131:9090']          #prometheus的地址埠,監控Prometheus資訊
  labels:
    app: 'app1'
    env: 'game1'
    region: 'reg1'
- targets: ['192.168.31.158:9100']			#另外伺服器的node-exporter的地址埠,監控伺服器資訊
  labels:
    app: 'app2'
    env: 'game2'
    region: 'reg2'

  

[root@localhost prometheus]# cat /home/prometheus/prometheus.yml
global:             # 全域性設定,可以被覆蓋
  scrape_interval: 15s              # 抓取取樣資料的時間間隔,每15秒去被監控機上取樣,即資料採集頻率
  evaluation_interval: 15s          # 監控資料規則的評估頻率,比如設定檔案系統使用率>75%發出告警則每15秒執行一次該規則,進行檔案系統檢查

#告警管理
alerting:
    alertmanagers:
    - static_configs:                       #靜態目標配置
      - targets: ['192.168.31.131:9093']    #告警ui地址

#告警規則
rule_files:
- /etc/prometheus/rules/*.rules     #告警規則檔案路徑

scrape_configs:             # 抓取配置
#靜態發現
  - job_name: 'grafana'     #任務名 全域性唯一
    scrape_interval: 5s     # 抓取取樣資料的時間間隔
    static_configs:         #靜態目標配置
      - targets: ['192.168.31.131:3000']   #抓取地址,預設為/metrics
        labels:             #標籤
          instance: grafana
#pushgateway中轉
  - job_name: pushgateway
    static_configs:
      - targets: ['192.168.31.158:9091']
        labels:
          instance: pushgateway
#檔案發現
  - job_name: 'file_ds'     #任務名 全域性唯一
    file_sd_configs:        #基於檔案發現配置
    - files: ['/etc/prometheus/config/*.yml']        #配置檔案路徑,匹配config目錄下所有yml檔案
      refresh_interval: 5s  #每五秒掃描重新整理配置檔案

  

curl -X POST http://localhost:9090/-/reload		#熱載入prometheus配置檔案

  

配置基於服務發現

  安裝consul

wget https://releases.hashicorp.com/consul/1.6.1/consul_1.6.1_linux_amd64.zip
unzip consul_1.5.3_linux_amd64.zip
./consul agent -dev
或者
docker run --name consul -d -p 8500:8500 consul

  登出服務

curl -X PUT http://192.168.31.131:8500/v1/agent/service/deregister/node-exporter
#node-exporter就是"id": "node-exporter"

  註冊服務

#vim /home/prometheus/config/consul-1.json
{
"ID": "node-exporter",
"Name": "node-exporter-192.168.31.131",
"Tags": [
"node-exporter"
],
"Address": "192.168.31.131",
"Port": 9100,
"Meta": {
"app": "spring-boot",
"team": "appgroup",
"project": "bigdata"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://192.168.31.131:9100/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}

# 更新註冊服務
#curl --request PUT --data @/home/prometheus/config/consul-1.json http://192.168.31.131:8500/v1/agent/service/register?replace-existing-checks=1

$ vim /home/prometheus/config/consul-2.json
{
"ID": "cadvisor-exporter",
"Name": "cadvisor-exporter-192.168.31.131",
"Tags": [
"cadvisor-exporter"
],
"Address": "192.168.31.131",
"Port": 9080,
"Meta": {
"app": "docker",
"team": "cloudgroup",
"project": "docker-service"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://192.168.31.131:9080/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}

# 註冊服務
# curl --request PUT --data @/home/prometheus/config/consul-2.json http://192.168.31.131:8500/v1/agent/service/register?replace-existing-checks=1

  更新Prometheus.yml

[root@localhost prometheus]# vim /home/prometheus/prometheus.yml
#檔案發現
  - job_name: 'file_ds'     #任務名 全域性唯一
    file_sd_configs:        #基於檔案發現配置
    - files: ['/etc/prometheus/config/*.yml']        #配置檔案路徑
      refresh_interval: 5s  #每五秒掃描重新整理配置檔案
#服務發現
  - job_name: 'consul-node-exporter'
    consul_sd_configs:							#基於服務發現型別
      - server: '192.168.31.131:8500'			#服務地址
        services: []  
    relabel_configs:
      - source_labels: [__meta_consul_tags]		#注意兩個橫槓"__"
        regex: .*node-exporter.*				#匹配__meta_consul_tags中值包含node-exporter的
        action: keep							#keep丟棄未匹配到regex中內容的資料
      - regex: __meta_consul_service_metadata_(.+)	#獲取__meta_consul_service_metadata_的值(標籤)
        action: labelmap							#將獲取的值作為新的標籤

  - job_name: 'consul-cadvisor-exproter'
    consul_sd_configs:
      - server: '192.168.31.131:8500'
        services: []
    relabel_configs:
      - source_labels: [__meta_consul_tags]
        regex: .*cadvisor-exporter.*
        action: keep
      - regex: __meta_consul_service_metadata_(.+)
        action: labelmap

 

curl -X POST http://localhost:9090/-/reload		#熱載入prometheus配置檔案