k8s搭建prometheus alertmanager
阿新 • • 發佈:2018-12-21
--- kind: Service apiVersion: v1 metadata: labels: app: prometheus name: prometheus namespace: ns-monitor spec: type: NodePort ports: - name: web port: 9099 targetPort: 9099 nodePort: 30003 - name: alert port: 9093 targetPort: 9093 nodePort: 30004 selector: app: prometheus --- apiVersion: extensions/v1beta1 kind: Ingress metadata: name: prometheus-ingress namespace: ns-monitor spec: rules: - host: prome.baassit.test.com http: paths: - path: / backend: serviceName: prometheus servicePort: 9099
--- apiVersion: apps/v1beta2 kind: Deployment metadata: labels: name: prometheus-deployment name: prometheus namespace: ns-monitor spec: replicas: 1 selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus spec: nodeSelector: node: kube-node1 containers: - name: prometheus image: 10.47.204.25/prom/prometheus:v2.3.1 imagePullPolicy: Always command: - "/bin/prometheus" args: - "--config.file=/etc/prometheus/prometheus.yml" - "--web.listen-address=:9099" - "--web.external-url=http://prome.baassit.test.com" - "--storage.tsdb.path=/prometheus" ports: - containerPort: 9099 protocol: TCP volumeMounts: - mountPath: "/prometheus" name: data - mountPath: "/etc/prometheus" name: config-volume - mountPath: "/etc/rules" name: rules-volume - name: alertmanager image: 10.47.204.25/prom/alertmanager:0.15.2 imagePullPolicy: IfNotPresent args: - "--config.file=/etc/alertmanager/alertmanager.yml" ports: - name: alertmanager containerPort: 9093 volumeMounts: - name: alert-volume mountPath: /etc/alertmanager/alertmanager.yml serviceAccountName: prometheus imagePullSecrets: - name: regsecret securityContext: runAsUser: 0 volumes: - name: data persistentVolumeClaim: claimName: prometheus-pvc-001 - name: config-volume configMap: name: prometheus-config - name: rules-volume configMap: name: rules-config - name: alert-volume hostPath: path: /opt/prome/alertmanager.yml --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: prometheus-pvc-001 namespace: ns-monitor labels: app: prometheus-pvc spec: accessModes: - "ReadWriteOnce" resources: requests: storage: "5Gi" storageClassName: "glusterfs"
apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus rules: - apiGroups: [""] resources: - nodes - nodes/proxy - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: - extensions resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: ns-monitor --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: ns-monitor
apiVersion: v1
data:
rules.yml: |
groups:
- name: container-rule
rules:
- alert: ContainerDisksystemUsage
expr: (node_filesystem_size{device="rootfs"} - node_filesystem_free{device="rootfs"}) / node_filesystem_size{device="rootfs"} * 100 > 80
for: 1m
labels:
team: container
annotations:
summary: "{{$labels.instance}}: {{$labels.name}}High Filesystem usage detected"
description: "{{$labels.instance}}: {{$labels.name}}容器Disk使用率超過 80% (current value is: {{ $value }}"
- alert: ContainerMemoryUsage
expr: container_memory_usage_bytes{container_name!="",namespace!="",container_name!="POD"}/container_spec_memory_limit_bytes{container_name!="",namespace!="",container_name!="POD"}*100 > 40
for: 1m
labels:
team: container
annotations:
summary: "{{$labels.instance}}: {{$labels.name}}High Memory usage detected"
description: "{{$labels.instance}}: {{$labels.name}}容器記憶體使用率超過80% (current value is: {{ $value }}"
- alert: ContainerCPUUsage
expr: (rate(container_cpu_usage_seconds_total{container_name!="",namespace!="",container_name!="POD"}[1m])) / (container_spec_cpu_quota{container_name!="",namespace!="",container_name!="POD"}/100000)*100 > 10
for: 1m
labels:
team: container
annotations:
summary: "{{$labels.instance}}: {{$labels.name}}High CPU usage detected"
description: "{{$labels.instance}}: {{$labels.name}}容器Cpu使用率超過80% (current value is: {{ $value }}"
kind: ConfigMap
metadata:
name: rules-config
namespace: ns-monitor
global:
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10m
repeat_interval: 5h
receiver: test
routes:
- match:
name: cpu
receiver: cpu
- match:
name: memory
receiver: memory
- match:
mountpoint: /opt
receiver: opt
- match:
mountpoint: /var
receiver: var
receivers:
- name: 'test'
- name: 'cpu'
webhook_configs:
- url: 'http://172.30.88.2:28889'
send_resolved: true
- name: 'memory'
webhook_configs:
- url: 'http://172.30.88.2:28889'
send_resolved: true
- name: 'opt'
webhook_configs:
- url: 'http://172.30.88.2:28889'
send_resolved: true
- name: 'var'
webhook_configs:
- url: 'http://172.30.88.2:28889'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: ns-monitor
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: ["172.30.88.2:9093"]
#- alertmanger:9093
rule_files:
- /etc/rules/rules.yml
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: 'kubernetes-ingresses'
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'federate'
scrape_interval: 15s
honor_labels: true
metrics_path: '/federate'
params:
'match[]':
- '{job=~"prometheus.*"}'
- '{job="SNBCADM_NJXG_PST_10.244.160.57_DOCKER"}'
file_sd_configs:
- files:
- /data/node-discovery.json
- job_name: 'consul-prometheus'
consul_sd_configs:
- server: '10.47.204.11:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*prometheus-target.*
action: keep