︿
Top

Install Prometheus on Kubernetes feat Grafana


上一篇手動安裝 Wordpress 後,本篇我們一樣採用手動編寫 YAML 文檔安裝 Promethues 監控解決方案 for Kubernetes,透過這樣一步一腳印的方式建立起的套裝服務讓我們可以更扎實的理解 Kubernetes 裡應用服務的建置與運作配置邏輯。





實驗環境規格

主機安裝的作業系統皆為 CentOS 7.9x64
Kubernetes 建置流程可參閱此篇文章
nfs 存儲建置可參閱此篇文章





配置 nfs 對應的 PV 空間

建立目錄位置 for PV

[root@nfs ~]# mkdir -p /tmp/nfs/003 /tmp/nfs/004





安裝 Node exporter

首先替這套監控方案建立一個專屬的 namespace,透過 tolerations 容忍度參數與 daemontset 控制器給叢集中的所有 nodes 上分別部署一個 node exporter Pod,鏡像版本的選擇(Nov 30,2022)以接近你 Kubernetes 的發行版本為主

apiVersion: v1
kind: Namespace
metadata:
  name: monitor-sa
  
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitor-sa
  labels:
    name: node-exporter
spec:
  selector:
    matchLabels:
      name: node-exporter
  template:
    metadata:
      labels:
        name: node-exporter
    spec:
      hostPID: true
      hostIPC: true
      hostNetwork: true                                   # 共享宿主機網路和進程
      containers:
      - name: node-exporter
        image: prom/node-exporter:v1.4.1                  # Release on Nov 30,2022
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 9100                             # 容器暴露端口為9100
        resources:
          requests:
            cpu: 0.15
        securityContext:
          privileged: true                                # 開啟特權模式
        args:
        - --path.procfs
        - /host/proc
        - --path.sysfs
        - /host/sys
        - --collector.filesystem.ignored-mount-points
        - '"^/(sys|proc|dev|host|etc)($|/)"'
        volumeMounts:                                     # 挂载宿主機目錄以收集宿主機信息
        - name: dev
          mountPath: /host/dev
        - name: proc
          mountPath: /host/proc
        - name: sys
          mountPath: /host/sys
        - name: rootfs
          mountPath: /rootfs
      tolerations:                                        # 定義容忍度,使其可調度到默認有污点的master
      - key: "node-role.kubernetes.io/master"
        operator: "Exists"
        effect: "NoSchedule"
      volumes:                                            # 定義存儲卷
        - name: proc
          hostPath:
            path: /proc
        - name: dev
          hostPath:
            path: /dev
        - name: sys
          hostPath:
            path: /sys
        - name: rootfs
          hostPath:
            path: /
[root@k8s-master prometheus]# kubectl apply -f ./node-exporter.yaml




安裝 Prometheus

建立 serviceAccount for Prometheus,使其對 Kubernetes 的各項資源擁有足夠的存取權限

apiVersion: v1
kind: ServiceAccount
metadata:
  name: monitor
  namespace: monitor-sa

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: monitor-clusterrolebinding
  namespace: monitor-sa
subjects:
- kind: ServiceAccount
  name: monitor
  namespace: monitor-sa
roleRef:
  kind: ClusterRole
  name: cluster-admin
[root@k8s-master prometheus]# kubectl apply -f ./service-account.yaml

配置 PVC 持久化存儲空間 for Prometheus

apiVersion: v1
kind: PersistentVolume
metadata:
  name: 10g-disk003
spec:
  capacity:
    storage: 10Gi
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: /tmp/nfs/003
    server: 192.168.200.158

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus-pvc
  namespace: monitor-sa
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 10Gi
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-pvc.yaml

透過 configMap 配置 prometheus.yml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor-sa
data:

  prometheus.yml: |
    global:                     #全局配置
      scrape_interval:     15s  #拉取數據頻率
      scrape_timeout:      10s  #拉取超時時間
      evaluation_interval: 30s  #執行規則頻率(這個值要大於拉取頻率,否則會造成發生一個故障而產生多次告警)
    scrape_configs:             #拉取配置(有靜態配置和服務發現兩種)

    - job_name: 'kubernetes-apiservers'  #一個job為一個拉取任務
      kubernetes_sd_configs:             #k8s的服務發現
      - role: endpoints                  #從endpoints列表中發現所有targets
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:           #k8s的服務發現
      - role: node                     #使用kubelet提供的http端口發現node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:                 #重新標記標籤
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)      #匹配到該表達式的標籤會保留
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]  #原始標籤,匹配地址
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics

    - job_name: 'kubernetes-cadvisor'   #抓取cAdvisor数据,是获取 kubelet 上/metrics/cadvisor 接口数据来获取容器的资源使用情况
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name

    - job_name: 'kubernetes-services'
      kubernetes_sd_configs:
      - role: service
      metrics_path: /probe
      params:
        module: [http_2xx]
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
        action: keep
        regex: true
      - source_labels: [__address__]
        target_label: __param_target
      - target_label: __address__
        replacement: blackbox-exporter.example.com:9115
      - source_labels: [__param_target]
        target_label: instance
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        target_label: kubernetes_name

    - job_name: 'kubernetes-ingresses'
      kubernetes_sd_configs:
      - role: ingress
      relabel_configs:
      - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
        regex: (.+);(.+);(.+)
        replacement: ${1}://${2}${3}
        target_label: __param_target
      - target_label: __address__
        replacement: blackbox-exporter.example.com:9115
      - source_labels: [__param_target]
        target_label: instance
      - action: labelmap
        regex: __meta_kubernetes_ingress_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_ingress_name]
        target_label: kubernetes_name

    - job_name: 'kubernetes-pods'
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: kubernetes_pod_name
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-cfg.yaml

部署 Prometheus,鏡像版本的選擇(Nov 17,2022)以接近你 Kubernetes 的發行版本為主

apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-server
  namespace: monitor-sa
  labels:
    app: prometheus
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
      component: server
  template:
    metadata:
      labels:
        app: prometheus
        component: server
      annotations:
        prometheus.io/scrape: 'false'                             # 該容器不會被prometheus發現並監控,其他pod可通過添加該注解(值為true)以服務發現的方式自動被prometheus監控到。
    spec:
      # nodeName: k8s-node01 
      serviceAccountName: monitor                                 # 指定serviceAccount,使容器有權限獲取數據
      containers:
      - name: prometheus                                          # 容器名稱
        image: prom/prometheus:v2.40.2                            # 鏡像名稱
        imagePullPolicy: IfNotPresent                             # 鏡像拉取策略
        command:                                                  # 容器啟動時執行的命令
          - prometheus
          - --config.file=/etc/prometheus/prometheus.yml
          - --storage.tsdb.path=/prometheus                       # 舊數據存儲目錄
          - --storage.tsdb.retention=720h                         # 舊數據保留時間
          - --web.enable-lifecycle                                # 開啟熱加載
        ports:                                                    # 容器暴露的端口
        - containerPort: 9090
          protocol: TCP                                           # 協議
        volumeMounts:                                             # 容器掛載的數據卷
        - mountPath: /etc/prometheus                              # 要掛載到哪裡
          name: prometheus-config                                 # 掛載誰(與下面定義的volume對應)
        - mountPath: /prometheus/                              
          name: prometheus-storage-volume
      volumes:                                                    # 數據卷定義
        - name: prometheus-config                                 # 名稱
          configMap:                                              # 從configmap獲取數據
            name: prometheus-config                               # configmap的名稱
        - name: prometheus-storage-volume
          persistentVolumeClaim:
            claimName: prometheus-pvc
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-deploy.yaml

暴露 Prometheus 端口

apiVersion: v1
kind: Service
metadata:
  name: prometheus-service
  namespace: monitor-sa
  labels:
    app: prometheus
spec: 
  type: NodePort
  ports:
  - port: 9090
    targetPort: 9090
    protocol: TCP
    nodePort: 30090 # 對外暴露埠號,依需求也可以選擇不要暴露
  selector:
    app: prometheus
    component: server
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-svc.yaml




安裝 Grafana

配置 PVC 持久化存儲空間 for Grafana

apiVersion: v1
kind: PersistentVolume
metadata:
  name: 10g-disk004
spec:
  capacity:
    storage: 10Gi
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: /tmp/nfs/004
    server: 192.168.200.158

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: grafana-pvc
  namespace: monitor-sa
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 10Gi
[root@k8s-master prometheus]# kubectl apply -f ./grafana-pvc.yaml

由於 Grafana 只是 UI 展示端,並不容易與 Kubernetes 發生版本衝突的問題,因此我們直接採用最新版本的鏡像安裝部署 Grafana

apiVersion: apps/v1
kind: Deployment
metadata:
  name: grafana-server
  namespace: monitor-sa
spec:
  replicas: 1
  selector:
    matchLabels:
      task: monitoring
      app: grafana
  template:
    metadata:
      labels:
        task: monitoring
        app: grafana
    spec:
      containers:
      - name: grafana
        image: grafana/grafana:12.0.4
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 3000
          protocol: TCP
        volumeMounts:
        - mountPath: /etc/ssl/certs
          name: ca-certificates
          readOnly: true
        - mountPath: /var
          name: grafana-storage
        - mountPath: /var/lib/grafana/
          name: lib
        env:
        - name: INFLUXDB_HOST
          value: monitoring-influxdb
        - name: GF_SERVER_HTTP_PORT
          value: "3000"
          # The following env variables are required to make Grafana accessible via
          # the kubernetes api-server proxy. On production clusters, we recommend
          # removing these env variables, setup auth for grafana, and expose the grafana
          # service using a LoadBalancer or a public IP.
        - name: GF_AUTH_BASIC_ENABLED
          value: "false"
        - name: GF_AUTH_ANONYMOUS_ENABLED
          value: "true"
        - name: GF_AUTH_ANONYMOUS_ORG_ROLE
          value: Admin
        - name: GF_SERVER_ROOT_URL
          # If you're only using the API Server proxy, set this value instead:
          # value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
          value: /
      volumes:
      - name: ca-certificates
        hostPath:
          path: /etc/ssl/certs
      - name: grafana-storage
        emptyDir: {}
      - name: lib
        persistentVolumeClaim:
          claimName: grafana-pvc
[root@k8s-master prometheus]# kubectl apply -f ./grafana-deploy.yaml

暴露 TCP 30091 端口給 Grafana

apiVersion: v1
kind: Service
metadata:
  labels:
    # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
    # If you are NOT using this as an addon, you should comment out this line.
    kubernetes.io/cluster-service: 'true'
    kubernetes.io/name: monitoring-grafana
  name: grafana-service
  namespace: monitor-sa
spec:
  # In a production setup, we recommend accessing Grafana through an external Loadbalancer
  # or through a public IP.
  # type: LoadBalancer
  # You could also use NodePort to expose the service at a randomly-generated port
  # type: NodePort
  type: NodePort
  ports:
  - port: 3000
    targetPort: 3000
    nodePort: 30091 # 對外暴露埠號
  selector:
    app: grafana
[root@k8s-master prometheus]# kubectl apply -f ./grafana-svc.yaml




透過 Prometheus 觀測 Kubernetes 的監控資料

瀏覽器輸入 http://192.168.200.161:30091 登入 Grafana 頁面添加 Prometheus 的 data source,Prometheus server URL 位址填入我們上面建立的 pormetheus service name+命名空間+元件名+端口號


前往 Grafana dashboards 找一個你喜歡的 Dashboard 模板,本例我們盲抽到 Kubernetes cluster monitoring (via Prometheus),點選右下角 Download JSON 下載 json 模板


前往 Dashboards > New > Import 上傳 json 檔案以匯入此模板,下方選擇我們剛剛建立的 Prometheus data source






本文內容參閱以下連結:
Kubernetes 集群和應用監控方案的設計與實踐
Prometheus服務發現之kubernetes_sd_config

推薦閱讀:

tomy

來自台灣的系統工程師,一直熱衷於 Open source 相關技術的學習、建置、應用與分享。

  • Image
  • Image
  • Image
  • Image
  • Image

0 Comments:

張貼留言