1. 程式人生 > >k8s搭建prometheus alertmanager

k8s搭建prometheus alertmanager

---
kind: Service
apiVersion: v1
metadata:
  labels:
    app: prometheus
  name: prometheus
  namespace: ns-monitor
spec:
  type: NodePort
  ports:
  - name: web
    port: 9099
    targetPort: 9099
    nodePort: 30003
  - name: alert
    port: 9093
    targetPort: 9093
    nodePort: 30004
  selector:
    app: prometheus
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: prometheus-ingress
  namespace: ns-monitor
spec:
  rules:
  - host: prome.baassit.test.com
    http:
      paths:
      - path: /
        backend:
          serviceName: prometheus
          servicePort: 9099
---
apiVersion: apps/v1beta2
kind: Deployment
metadata:
  labels:
    name: prometheus-deployment
  name: prometheus
  namespace: ns-monitor
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      nodeSelector:
        node: kube-node1
      containers:
      - name: prometheus
        image: 10.47.204.25/prom/prometheus:v2.3.1
        imagePullPolicy: Always
        command:
        - "/bin/prometheus"
        args:
        - "--config.file=/etc/prometheus/prometheus.yml"
        - "--web.listen-address=:9099"
        - "--web.external-url=http://prome.baassit.test.com"
        - "--storage.tsdb.path=/prometheus"
        ports:
        - containerPort: 9099
          protocol: TCP
        volumeMounts:
        - mountPath: "/prometheus"
          name: data
        - mountPath: "/etc/prometheus"
          name: config-volume
        - mountPath: "/etc/rules"
          name: rules-volume
      - name: alertmanager 
        image: 10.47.204.25/prom/alertmanager:0.15.2
        imagePullPolicy: IfNotPresent
        args:
        - "--config.file=/etc/alertmanager/alertmanager.yml"
        ports:
        - name: alertmanager
          containerPort: 9093
        volumeMounts: 
        - name: alert-volume
          mountPath: /etc/alertmanager/alertmanager.yml
      serviceAccountName: prometheus
      imagePullSecrets: 
        - name: regsecret
      securityContext:
        runAsUser: 0
      volumes:
      - name: data
        persistentVolumeClaim:
          claimName: prometheus-pvc-001
      - name: config-volume
        configMap:
          name: prometheus-config
      - name: rules-volume
        configMap:
          name: rules-config
      - name: alert-volume
        hostPath:
          path: /opt/prome/alertmanager.yml
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: prometheus-pvc-001
  namespace: ns-monitor
  labels:
    app: prometheus-pvc
spec:
  accessModes:
    - "ReadWriteOnce"
  resources:
    requests:
      storage: "5Gi"
  storageClassName: "glusterfs"
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: ns-monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: ns-monitor
apiVersion: v1
data:
  rules.yml: |
    groups:
    - name: container-rule
      rules:
      - alert: ContainerDisksystemUsage
        expr: (node_filesystem_size{device="rootfs"} - node_filesystem_free{device="rootfs"}) / node_filesystem_size{device="rootfs"} * 100 > 80
        for: 1m
        labels:
          team: container
        annotations:
          summary: "{{$labels.instance}}: {{$labels.name}}High Filesystem usage detected"
          description: "{{$labels.instance}}: {{$labels.name}}容器Disk使用率超過 80% (current value is: {{ $value }}"
      - alert: ContainerMemoryUsage
        expr: container_memory_usage_bytes{container_name!="",namespace!="",container_name!="POD"}/container_spec_memory_limit_bytes{container_name!="",namespace!="",container_name!="POD"}*100 > 40
        for: 1m
        labels:
          team: container
        annotations:
          summary: "{{$labels.instance}}: {{$labels.name}}High Memory usage detected"
          description: "{{$labels.instance}}: {{$labels.name}}容器記憶體使用率超過80% (current value is: {{ $value }}"
      - alert: ContainerCPUUsage
        expr: (rate(container_cpu_usage_seconds_total{container_name!="",namespace!="",container_name!="POD"}[1m])) / (container_spec_cpu_quota{container_name!="",namespace!="",container_name!="POD"}/100000)*100 > 10
        for: 1m
        labels:
          team: container
        annotations:
          summary: "{{$labels.instance}}: {{$labels.name}}High CPU usage detected"
          description: "{{$labels.instance}}: {{$labels.name}}容器Cpu使用率超過80% (current value is: {{ $value }}"
kind: ConfigMap
metadata:
  name: rules-config
  namespace: ns-monitor
global:
route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10m 
  repeat_interval: 5h 
  receiver: test
  routes:
  - match:
      name: cpu
    receiver: cpu
  - match:
      name: memory
    receiver: memory
  - match:
      mountpoint: /opt
    receiver: opt
  - match:
      mountpoint: /var
    receiver: var

receivers:
- name: 'test'
- name: 'cpu'
  webhook_configs: 
  - url: 'http://172.30.88.2:28889'
    send_resolved: true 
- name: 'memory'
  webhook_configs:
  - url: 'http://172.30.88.2:28889'
    send_resolved: true
- name: 'opt'
  webhook_configs: 
  - url: 'http://172.30.88.2:28889'
    send_resolved: true 
- name: 'var'
  webhook_configs:
  - url: 'http://172.30.88.2:28889'
    send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: ns-monitor
data:
  prometheus.yml: |
    global:
      scrape_interval:     15s
      evaluation_interval: 15s

    alerting:
      alertmanagers:
      - static_configs:
        - targets: ["172.30.88.2:9093"]
          #- alertmanger:9093
    rule_files:
      - /etc/rules/rules.yml

    scrape_configs:  
    - job_name: 'kubernetes-apiservers'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https
    
    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics

    - job_name: 'kubernetes-cadvisor'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name

    - job_name: 'kubernetes-services'
      kubernetes_sd_configs:
      - role: service
      metrics_path: /probe
      params:
        module: [http_2xx]
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
        action: keep
        regex: true
      - source_labels: [__address__]
        target_label: __param_target
      - target_label: __address__
        replacement: blackbox-exporter.example.com:9115
      - source_labels: [__param_target]
        target_label: instance
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        target_label: kubernetes_name

    - job_name: 'kubernetes-ingresses'
      kubernetes_sd_configs:
      - role: ingress
      relabel_configs:
      - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
        regex: (.+);(.+);(.+)
        replacement: ${1}://${2}${3}
        target_label: __param_target
      - target_label: __address__
        replacement: blackbox-exporter.example.com:9115
      - source_labels: [__param_target]
        target_label: instance
      - action: labelmap
        regex: __meta_kubernetes_ingress_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_ingress_name]
        target_label: kubernetes_name

    - job_name: 'kubernetes-pods'
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: kubernetes_pod_name

    - job_name: 'federate'
      scrape_interval: 15s
      honor_labels: true
      metrics_path: '/federate'
      params:
        'match[]':
          - '{job=~"prometheus.*"}'
          - '{job="SNBCADM_NJXG_PST_10.244.160.57_DOCKER"}'
      file_sd_configs:
        - files:
          - /data/node-discovery.json
    - job_name: 'consul-prometheus'
      consul_sd_configs:
      - server: '10.47.204.11:8500'
        services: []
      relabel_configs:
      - source_labels: [__meta_consul_tags]
        regex: .*prometheus-target.*
        action: keep