繼上一篇手動安裝 Wordpress 後,本篇我們一樣採用手動編寫 YAML 文檔安裝 Promethues 監控解決方案 for Kubernetes,透過這樣一步一腳印的方式建立起的套裝服務讓我們可以更扎實的理解 Kubernetes 裡應用服務的建置與運作配置邏輯。
實驗環境規格
主機安裝的作業系統皆為 CentOS 7.9x64
配置 nfs 對應的 PV 空間
建立目錄位置 for PV[root@nfs ~]# mkdir -p /tmp/nfs/003 /tmp/nfs/004
安裝 Node exporter
首先替這套監控方案建立一個專屬的 namespace,透過 tolerations 容忍度參數與 daemontset 控制器給叢集中的所有 nodes 上分別部署一個 node exporter Pod,鏡像版本的選擇(Nov 30,2022)以接近你 Kubernetes 的發行版本為主
apiVersion: v1
kind: Namespace
metadata:
name: monitor-sa
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitor-sa
labels:
name: node-exporter
spec:
selector:
matchLabels:
name: node-exporter
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true # 共享宿主機網路和進程
containers:
- name: node-exporter
image: prom/node-exporter:v1.4.1 # Release on Nov 30,2022
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9100 # 容器暴露端口為9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true # 開啟特權模式
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts: # 挂载宿主機目錄以收集宿主機信息
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
tolerations: # 定義容忍度,使其可調度到默認有污点的master
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
volumes: # 定義存儲卷
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
[root@k8s-master prometheus]# kubectl apply -f ./node-exporter.yaml
安裝 Prometheus
建立 serviceAccount for Prometheus,使其對 Kubernetes 的各項資源擁有足夠的存取權限
apiVersion: v1
kind: ServiceAccount
metadata:
name: monitor
namespace: monitor-sa
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: monitor-clusterrolebinding
namespace: monitor-sa
subjects:
- kind: ServiceAccount
name: monitor
namespace: monitor-sa
roleRef:
kind: ClusterRole
name: cluster-admin
[root@k8s-master prometheus]# kubectl apply -f ./service-account.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: 10g-disk003
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: /tmp/nfs/003
server: 192.168.200.158
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-pvc
namespace: monitor-sa
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-pvc.yaml
透過 configMap 配置 prometheus.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitor-sa
data:
prometheus.yml: |
global: #全局配置
scrape_interval: 15s #拉取數據頻率
scrape_timeout: 10s #拉取超時時間
evaluation_interval: 30s #執行規則頻率(這個值要大於拉取頻率,否則會造成發生一個故障而產生多次告警)
scrape_configs: #拉取配置(有靜態配置和服務發現兩種)
- job_name: 'kubernetes-apiservers' #一個job為一個拉取任務
kubernetes_sd_configs: #k8s的服務發現
- role: endpoints #從endpoints列表中發現所有targets
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs: #k8s的服務發現
- role: node #使用kubelet提供的http端口發現node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs: #重新標記標籤
- action: labelmap
regex: __meta_kubernetes_node_label_(.+) #匹配到該表達式的標籤會保留
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name] #原始標籤,匹配地址
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor' #抓取cAdvisor数据,是获取 kubelet 上/metrics/cadvisor 接口数据来获取容器的资源使用情况
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: 'kubernetes-ingresses'
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-cfg.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitor-sa
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false' # 該容器不會被prometheus發現並監控,其他pod可通過添加該注解(值為true)以服務發現的方式自動被prometheus監控到。
spec:
# nodeName: k8s-node01
serviceAccountName: monitor # 指定serviceAccount,使容器有權限獲取數據
containers:
- name: prometheus # 容器名稱
image: prom/prometheus:v2.40.2 # 鏡像名稱
imagePullPolicy: IfNotPresent # 鏡像拉取策略
command: # 容器啟動時執行的命令
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus # 舊數據存儲目錄
- --storage.tsdb.retention=720h # 舊數據保留時間
- --web.enable-lifecycle # 開啟熱加載
ports: # 容器暴露的端口
- containerPort: 9090
protocol: TCP # 協議
volumeMounts: # 容器掛載的數據卷
- mountPath: /etc/prometheus # 要掛載到哪裡
name: prometheus-config # 掛載誰(與下面定義的volume對應)
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes: # 數據卷定義
- name: prometheus-config # 名稱
configMap: # 從configmap獲取數據
name: prometheus-config # configmap的名稱
- name: prometheus-storage-volume
persistentVolumeClaim:
claimName: prometheus-pvc
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-deploy.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus-service
namespace: monitor-sa
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
protocol: TCP
nodePort: 30090 # 對外暴露埠號,依需求也可以選擇不要暴露
selector:
app: prometheus
component: server
[root@k8s-master prometheus]# kubectl apply -f ./prometheus-svc.yaml
安裝 Grafana
配置 PVC 持久化存儲空間 for GrafanaapiVersion: v1
kind: PersistentVolume
metadata:
name: 10g-disk004
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: /tmp/nfs/004
server: 192.168.200.158
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-pvc
namespace: monitor-sa
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
[root@k8s-master prometheus]# kubectl apply -f ./grafana-pvc.yaml
由於 Grafana 只是 UI 展示端,並不容易與 Kubernetes 發生版本衝突的問題,因此我們直接採用最新版本的鏡像安裝部署 Grafana
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana-server
namespace: monitor-sa
spec:
replicas: 1
selector:
matchLabels:
task: monitoring
app: grafana
template:
metadata:
labels:
task: monitoring
app: grafana
spec:
containers:
- name: grafana
image: grafana/grafana:12.0.4
imagePullPolicy: IfNotPresent
ports:
- containerPort: 3000
protocol: TCP
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certificates
readOnly: true
- mountPath: /var
name: grafana-storage
- mountPath: /var/lib/grafana/
name: lib
env:
- name: INFLUXDB_HOST
value: monitoring-influxdb
- name: GF_SERVER_HTTP_PORT
value: "3000"
# The following env variables are required to make Grafana accessible via
# the kubernetes api-server proxy. On production clusters, we recommend
# removing these env variables, setup auth for grafana, and expose the grafana
# service using a LoadBalancer or a public IP.
- name: GF_AUTH_BASIC_ENABLED
value: "false"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
value: Admin
- name: GF_SERVER_ROOT_URL
# If you're only using the API Server proxy, set this value instead:
# value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
value: /
volumes:
- name: ca-certificates
hostPath:
path: /etc/ssl/certs
- name: grafana-storage
emptyDir: {}
- name: lib
persistentVolumeClaim:
claimName: grafana-pvc
[root@k8s-master prometheus]# kubectl apply -f ./grafana-deploy.yaml
apiVersion: v1
kind: Service
metadata:
labels:
# For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
# If you are NOT using this as an addon, you should comment out this line.
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: monitoring-grafana
name: grafana-service
namespace: monitor-sa
spec:
# In a production setup, we recommend accessing Grafana through an external Loadbalancer
# or through a public IP.
# type: LoadBalancer
# You could also use NodePort to expose the service at a randomly-generated port
# type: NodePort
type: NodePort
ports:
- port: 3000
targetPort: 3000
nodePort: 30091 # 對外暴露埠號
selector:
app: grafana
[root@k8s-master prometheus]# kubectl apply -f ./grafana-svc.yaml
透過 Prometheus 觀測 Kubernetes 的監控資料
瀏覽器輸入 http://192.168.200.161:30091 登入 Grafana 頁面添加 Prometheus 的 data source,Prometheus server URL 位址填入我們上面建立的 pormetheus service name+命名空間+元件名+端口號前往 Grafana dashboards 找一個你喜歡的 Dashboard 模板,本例我們盲抽到 Kubernetes cluster monitoring (via Prometheus),點選右下角 Download JSON 下載 json 模板
前往 Dashboards > New > Import 上傳 json 檔案以匯入此模板,下方選擇我們剛剛建立的 Prometheus data source
本文內容參閱以下連結:
Kubernetes 集群和應用監控方案的設計與實踐Prometheus服務發現之kubernetes_sd_config
推薦閱讀:
0 Comments:
張貼留言