fix
This commit is contained in:
@@ -8,7 +8,7 @@ locals {
|
||||
}
|
||||
global = {
|
||||
"domain" = var.namespace
|
||||
"domain-name" = "admin.${var.domain-name}"
|
||||
"domain-name" = "monitor.${var.domain-name}"
|
||||
"issuer" = var.issuer
|
||||
"ingress-class" = var.ingress-class
|
||||
"backups" = var.backups
|
||||
@@ -16,6 +16,8 @@ locals {
|
||||
}
|
||||
prometheus = { for k, v in var.prometheus : k => v if k!="enable" }
|
||||
alertmanager = { for k, v in var.alertmanager : k => v if k!="enable" }
|
||||
alerts-core = { for k, v in var.alerts-core : k => v if k!="enable" }
|
||||
alerts-containers = { for k, v in var.alerts-containers : k => v if k!="enable" }
|
||||
nodeExporter = { for k, v in var.node-exporter : k => v if k!="enable" }
|
||||
kubeStateMetrics = merge({"cluster-admin" = true}, { for k, v in var.kube-state-metrics : k => v if k!="enable" })
|
||||
monitorControlPlan = merge({"cluster-admin" = true}, { for k, v in var.monitor-control-plan : k => v if k!="enable" })
|
||||
@@ -117,3 +119,38 @@ resource "kubectl_manifest" "monitorControlPlan" {
|
||||
options: ${jsonencode(merge(local.global, local.monitorControlPlan))}
|
||||
EOF
|
||||
}
|
||||
|
||||
resource "kubectl_manifest" "alerts-core" {
|
||||
count = var.alerts-core.enable ? 1 : 0
|
||||
depends_on = [kubernetes_namespace_v1.monitor-ns]
|
||||
yaml_body = <<-EOF
|
||||
apiVersion: "vynil.solidite.fr/v1"
|
||||
kind: "Install"
|
||||
metadata:
|
||||
name: "alerts-core"
|
||||
namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}"
|
||||
labels: ${jsonencode(local.common-labels)}
|
||||
spec:
|
||||
distrib: "${var.distributions.domain}"
|
||||
category: "monitor"
|
||||
component: "alerts-core"
|
||||
options: ${jsonencode(merge(local.global, local.alerts-core))}
|
||||
EOF
|
||||
}
|
||||
resource "kubectl_manifest" "alerts-containers" {
|
||||
count = var.alerts-containers.enable ? 1 : 0
|
||||
depends_on = [kubernetes_namespace_v1.monitor-ns]
|
||||
yaml_body = <<-EOF
|
||||
apiVersion: "vynil.solidite.fr/v1"
|
||||
kind: "Install"
|
||||
metadata:
|
||||
name: "alerts-containers"
|
||||
namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}"
|
||||
labels: ${jsonencode(local.common-labels)}
|
||||
spec:
|
||||
distrib: "${var.distributions.domain}"
|
||||
category: "monitor"
|
||||
component: "alerts-containers"
|
||||
options: ${jsonencode(merge(local.global, local.alerts-containers))}
|
||||
EOF
|
||||
}
|
||||
|
||||
@@ -6,11 +6,91 @@ metadata:
|
||||
name: domain-monitor
|
||||
description: null
|
||||
options:
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
examples:
|
||||
- letsencrypt-prod
|
||||
type: string
|
||||
ingress-class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
app-group:
|
||||
default: monitor
|
||||
examples:
|
||||
- monitor
|
||||
type: string
|
||||
alertmanager:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: alertmanager
|
||||
alerts-core:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: alerts-core
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
prometheus:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: prometheus
|
||||
monitor-control-plan:
|
||||
default:
|
||||
enable: false
|
||||
examples:
|
||||
- enable: false
|
||||
properties:
|
||||
enable:
|
||||
default: false
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: monitor-control-plan
|
||||
kube-state-metrics:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: kube-state-metrics
|
||||
storage-classes:
|
||||
default:
|
||||
BlockReadWriteMany: ''
|
||||
@@ -36,12 +116,7 @@ options:
|
||||
default: ''
|
||||
type: string
|
||||
type: object
|
||||
ingress-class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
alertmanager:
|
||||
alerts-containers:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
@@ -52,7 +127,19 @@ options:
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: alertmanager
|
||||
x-vynil-package: alerts-containers
|
||||
node-exporter:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: node-exporter
|
||||
backups:
|
||||
default:
|
||||
enable: false
|
||||
@@ -83,59 +170,6 @@ options:
|
||||
default: backup-settings
|
||||
type: string
|
||||
type: object
|
||||
app-group:
|
||||
default: infra
|
||||
examples:
|
||||
- infra
|
||||
type: string
|
||||
kube-state-metrics:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: kube-state-metrics
|
||||
monitor-control-plan:
|
||||
default:
|
||||
enable: false
|
||||
examples:
|
||||
- enable: false
|
||||
properties:
|
||||
enable:
|
||||
default: false
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: monitor-control-plan
|
||||
prometheus:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: prometheus
|
||||
node-exporter:
|
||||
default:
|
||||
enable: true
|
||||
examples:
|
||||
- enable: true
|
||||
properties:
|
||||
enable:
|
||||
default: true
|
||||
type: boolean
|
||||
type: object
|
||||
x-vynil-category: monitor
|
||||
x-vynil-package: node-exporter
|
||||
distributions:
|
||||
default:
|
||||
core: core
|
||||
@@ -151,16 +185,6 @@ options:
|
||||
default: domain
|
||||
type: string
|
||||
type: object
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
dependencies: []
|
||||
providers:
|
||||
kubernetes: true
|
||||
|
||||
@@ -6,16 +6,41 @@ metadata:
|
||||
name: alertmanager
|
||||
description: null
|
||||
options:
|
||||
sub-domain:
|
||||
default: to-be-set
|
||||
replicas:
|
||||
default: 1
|
||||
examples:
|
||||
- to-be-set
|
||||
- 1
|
||||
type: integer
|
||||
listenLocal:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
logLevel:
|
||||
default: info
|
||||
examples:
|
||||
- info
|
||||
type: string
|
||||
sub-domain:
|
||||
default: alertmanager
|
||||
examples:
|
||||
- alertmanager
|
||||
type: string
|
||||
ingress-class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
retention:
|
||||
default: 120h
|
||||
examples:
|
||||
- 120h
|
||||
type: string
|
||||
app-group:
|
||||
default: monitor
|
||||
examples:
|
||||
- monitor
|
||||
type: string
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
examples:
|
||||
@@ -60,41 +85,16 @@ options:
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
replicas:
|
||||
default: 1
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
retention:
|
||||
default: 120h
|
||||
examples:
|
||||
- 120h
|
||||
type: string
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
app-group:
|
||||
default: monitor
|
||||
examples:
|
||||
- monitor
|
||||
type: string
|
||||
logLevel:
|
||||
default: info
|
||||
examples:
|
||||
- info
|
||||
type: string
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
listenLocal:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
dependencies:
|
||||
- dist: null
|
||||
category: share
|
||||
|
||||
16
monitor/alerts-containers/datas.tf
Normal file
16
monitor/alerts-containers/datas.tf
Normal file
@@ -0,0 +1,16 @@
|
||||
locals {
|
||||
common-labels = {
|
||||
"vynil.solidite.fr/owner-name" = var.instance
|
||||
"vynil.solidite.fr/owner-namespace" = var.namespace
|
||||
"vynil.solidite.fr/owner-category" = var.category
|
||||
"vynil.solidite.fr/owner-component" = var.component
|
||||
"app.kubernetes.io/managed-by" = "vynil"
|
||||
"app.kubernetes.io/instance" = var.instance
|
||||
}
|
||||
}
|
||||
|
||||
data "kustomization_overlay" "data" {
|
||||
common_labels = local.common-labels
|
||||
namespace = var.namespace
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"]
|
||||
}
|
||||
18
monitor/alerts-containers/index.yaml
Normal file
18
monitor/alerts-containers/index.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
apiVersion: vinyl.solidite.fr/v1beta1
|
||||
kind: Component
|
||||
category: monitor
|
||||
metadata:
|
||||
name: alerts-containers
|
||||
description: null
|
||||
options: {}
|
||||
dependencies: []
|
||||
providers:
|
||||
kubernetes: true
|
||||
authentik: null
|
||||
kubectl: true
|
||||
postgresql: null
|
||||
restapi: null
|
||||
http: null
|
||||
gitea: null
|
||||
tfaddtype: null
|
||||
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
16
monitor/alerts-core/datas.tf
Normal file
16
monitor/alerts-core/datas.tf
Normal file
@@ -0,0 +1,16 @@
|
||||
locals {
|
||||
common-labels = {
|
||||
"vynil.solidite.fr/owner-name" = var.instance
|
||||
"vynil.solidite.fr/owner-namespace" = var.namespace
|
||||
"vynil.solidite.fr/owner-category" = var.category
|
||||
"vynil.solidite.fr/owner-component" = var.component
|
||||
"app.kubernetes.io/managed-by" = "vynil"
|
||||
"app.kubernetes.io/instance" = var.instance
|
||||
}
|
||||
}
|
||||
|
||||
data "kustomization_overlay" "data" {
|
||||
common_labels = local.common-labels
|
||||
namespace = var.namespace
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"]
|
||||
}
|
||||
18
monitor/alerts-core/index.yaml
Normal file
18
monitor/alerts-core/index.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
apiVersion: vinyl.solidite.fr/v1beta1
|
||||
kind: Component
|
||||
category: monitor
|
||||
metadata:
|
||||
name: alerts-core
|
||||
description: null
|
||||
options: {}
|
||||
dependencies: []
|
||||
providers:
|
||||
kubernetes: true
|
||||
authentik: null
|
||||
kubectl: true
|
||||
postgresql: null
|
||||
restapi: null
|
||||
http: null
|
||||
gitea: null
|
||||
tfaddtype: null
|
||||
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
@@ -5,75 +5,11 @@ category: monitor
|
||||
metadata:
|
||||
name: monitor-control-plan
|
||||
description: null
|
||||
options:
|
||||
sub-domain:
|
||||
default: to-be-set
|
||||
examples:
|
||||
- to-be-set
|
||||
type: string
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
examples:
|
||||
- letsencrypt-prod
|
||||
type: string
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
ingress-class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
images:
|
||||
default:
|
||||
operator:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: docker.io
|
||||
repository: to-be/defined
|
||||
tag: v1.0.0
|
||||
examples:
|
||||
- operator:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: docker.io
|
||||
repository: to-be/defined
|
||||
tag: v1.0.0
|
||||
properties:
|
||||
operator:
|
||||
default:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: docker.io
|
||||
repository: to-be/defined
|
||||
tag: v1.0.0
|
||||
properties:
|
||||
pullPolicy:
|
||||
default: IfNotPresent
|
||||
enum:
|
||||
- Always
|
||||
- Never
|
||||
- IfNotPresent
|
||||
type: string
|
||||
registry:
|
||||
default: docker.io
|
||||
type: string
|
||||
repository:
|
||||
default: to-be/defined
|
||||
type: string
|
||||
tag:
|
||||
default: v1.0.0
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
options: {}
|
||||
dependencies: []
|
||||
providers:
|
||||
kubernetes: true
|
||||
authentik: true
|
||||
authentik: null
|
||||
kubectl: true
|
||||
postgresql: null
|
||||
restapi: null
|
||||
|
||||
@@ -6,41 +6,51 @@ metadata:
|
||||
name: prometheus
|
||||
description: null
|
||||
options:
|
||||
shards:
|
||||
default: 1
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
retention:
|
||||
default: 10d
|
||||
- letsencrypt-prod
|
||||
type: string
|
||||
app-group:
|
||||
default: monitor
|
||||
examples:
|
||||
- 10d
|
||||
- monitor
|
||||
type: string
|
||||
logLevel:
|
||||
default: info
|
||||
examples:
|
||||
- info
|
||||
type: string
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
enableAdminAPI:
|
||||
default: false
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
domain:
|
||||
default: your-company
|
||||
- false
|
||||
type: boolean
|
||||
replicas:
|
||||
default: 1
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
- 1
|
||||
type: integer
|
||||
listenLocal:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
domain-name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
ingress-class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
shards:
|
||||
default: 1
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
images:
|
||||
default:
|
||||
prometheus:
|
||||
@@ -80,31 +90,21 @@ options:
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
app-group:
|
||||
default: monitor
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- monitor
|
||||
- your-company
|
||||
type: string
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
retention:
|
||||
default: 10d
|
||||
examples:
|
||||
- letsencrypt-prod
|
||||
- 10d
|
||||
type: string
|
||||
sub-domain:
|
||||
default: prometheus
|
||||
examples:
|
||||
- prometheus
|
||||
type: string
|
||||
replicas:
|
||||
default: 1
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
enableAdminAPI:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
dependencies:
|
||||
- dist: null
|
||||
category: share
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-community-kube-kube-state-metrics
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "56.0.2"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.0.2
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: kube-state-metrics
|
||||
rules:
|
||||
- alert: KubeStateMetricsListErrors
|
||||
annotations:
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
|
||||
summary: kube-state-metrics is experiencing errors in list operations.
|
||||
expr: |-
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster)
|
||||
/
|
||||
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
annotations:
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
|
||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||
expr: |-
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster)
|
||||
/
|
||||
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsShardingMismatch
|
||||
annotations:
|
||||
description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
|
||||
summary: kube-state-metrics sharding is misconfigured.
|
||||
expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) != 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsShardsMissing
|
||||
annotations:
|
||||
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
|
||||
summary: kube-state-metrics shards are missing.
|
||||
expr: |-
|
||||
2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1
|
||||
-
|
||||
sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster)
|
||||
!= 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -1,32 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-community-kube-kubelet.rules
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "56.0.2"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.0.2
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: kubelet.rules
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.9'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.5'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
@@ -1,29 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-community-kube-kubernetes-system-kube-proxy
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "56.0.2"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.0.2
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: kubernetes-system-kube-proxy
|
||||
rules:
|
||||
- alert: KubeProxyDown
|
||||
annotations:
|
||||
description: KubeProxy has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: absent(up{job="kube-proxy"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
Reference in New Issue
Block a user