diff --git a/meta/domain-monitor/apps.tf b/meta/domain-monitor/apps.tf index b731492..a92dcb6 100644 --- a/meta/domain-monitor/apps.tf +++ b/meta/domain-monitor/apps.tf @@ -8,7 +8,7 @@ locals { } global = { "domain" = var.namespace - "domain-name" = "admin.${var.domain-name}" + "domain-name" = "monitor.${var.domain-name}" "issuer" = var.issuer "ingress-class" = var.ingress-class "backups" = var.backups @@ -16,6 +16,8 @@ locals { } prometheus = { for k, v in var.prometheus : k => v if k!="enable" } alertmanager = { for k, v in var.alertmanager : k => v if k!="enable" } + alerts-core = { for k, v in var.alerts-core : k => v if k!="enable" } + alerts-containers = { for k, v in var.alerts-containers : k => v if k!="enable" } nodeExporter = { for k, v in var.node-exporter : k => v if k!="enable" } kubeStateMetrics = merge({"cluster-admin" = true}, { for k, v in var.kube-state-metrics : k => v if k!="enable" }) monitorControlPlan = merge({"cluster-admin" = true}, { for k, v in var.monitor-control-plan : k => v if k!="enable" }) @@ -117,3 +119,38 @@ resource "kubectl_manifest" "monitorControlPlan" { options: ${jsonencode(merge(local.global, local.monitorControlPlan))} EOF } + +resource "kubectl_manifest" "alerts-core" { + count = var.alerts-core.enable ? 1 : 0 + depends_on = [kubernetes_namespace_v1.monitor-ns] + yaml_body = <<-EOF + apiVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "alerts-core" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "alerts-core" + options: ${jsonencode(merge(local.global, local.alerts-core))} + EOF +} +resource "kubectl_manifest" "alerts-containers" { + count = var.alerts-containers.enable ? 1 : 0 + depends_on = [kubernetes_namespace_v1.monitor-ns] + yaml_body = <<-EOF + apiVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "alerts-containers" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "alerts-containers" + options: ${jsonencode(merge(local.global, local.alerts-containers))} + EOF +} diff --git a/meta/domain-monitor/index.yaml b/meta/domain-monitor/index.yaml index fda45d5..f7b9c6b 100644 --- a/meta/domain-monitor/index.yaml +++ b/meta/domain-monitor/index.yaml @@ -6,11 +6,91 @@ metadata: name: domain-monitor description: null options: + domain-name: + default: your_company.com + examples: + - your_company.com + type: string issuer: default: letsencrypt-prod examples: - letsencrypt-prod type: string + ingress-class: + default: traefik + examples: + - traefik + type: string + app-group: + default: monitor + examples: + - monitor + type: string + alertmanager: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: alertmanager + alerts-core: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: alerts-core + domain: + default: your-company + examples: + - your-company + type: string + prometheus: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: prometheus + monitor-control-plan: + default: + enable: false + examples: + - enable: false + properties: + enable: + default: false + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: monitor-control-plan + kube-state-metrics: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: kube-state-metrics storage-classes: default: BlockReadWriteMany: '' @@ -36,12 +116,7 @@ options: default: '' type: string type: object - ingress-class: - default: traefik - examples: - - traefik - type: string - alertmanager: + alerts-containers: default: enable: true examples: @@ -52,7 +127,19 @@ options: type: boolean type: object x-vynil-category: monitor - x-vynil-package: alertmanager + x-vynil-package: alerts-containers + node-exporter: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: node-exporter backups: default: enable: false @@ -83,59 +170,6 @@ options: default: backup-settings type: string type: object - app-group: - default: infra - examples: - - infra - type: string - kube-state-metrics: - default: - enable: true - examples: - - enable: true - properties: - enable: - default: true - type: boolean - type: object - x-vynil-category: monitor - x-vynil-package: kube-state-metrics - monitor-control-plan: - default: - enable: false - examples: - - enable: false - properties: - enable: - default: false - type: boolean - type: object - x-vynil-category: monitor - x-vynil-package: monitor-control-plan - prometheus: - default: - enable: true - examples: - - enable: true - properties: - enable: - default: true - type: boolean - type: object - x-vynil-category: monitor - x-vynil-package: prometheus - node-exporter: - default: - enable: true - examples: - - enable: true - properties: - enable: - default: true - type: boolean - type: object - x-vynil-category: monitor - x-vynil-package: node-exporter distributions: default: core: core @@ -151,16 +185,6 @@ options: default: domain type: string type: object - domain: - default: your-company - examples: - - your-company - type: string - domain-name: - default: your_company.com - examples: - - your_company.com - type: string dependencies: [] providers: kubernetes: true diff --git a/monitor/alertmanager/index.yaml b/monitor/alertmanager/index.yaml index 694c0f6..c9414a2 100644 --- a/monitor/alertmanager/index.yaml +++ b/monitor/alertmanager/index.yaml @@ -6,16 +6,41 @@ metadata: name: alertmanager description: null options: - sub-domain: - default: to-be-set + replicas: + default: 1 examples: - - to-be-set + - 1 + type: integer + listenLocal: + default: false + examples: + - false + type: boolean + logLevel: + default: info + examples: + - info + type: string + sub-domain: + default: alertmanager + examples: + - alertmanager type: string ingress-class: default: traefik examples: - traefik type: string + retention: + default: 120h + examples: + - 120h + type: string + app-group: + default: monitor + examples: + - monitor + type: string issuer: default: letsencrypt-prod examples: @@ -60,41 +85,16 @@ options: type: string type: object type: object - replicas: - default: 1 - examples: - - 1 - type: integer - retention: - default: 120h - examples: - - 120h - type: string domain-name: default: your_company.com examples: - your_company.com type: string - app-group: - default: monitor - examples: - - monitor - type: string - logLevel: - default: info - examples: - - info - type: string domain: default: your-company examples: - your-company type: string - listenLocal: - default: false - examples: - - false - type: boolean dependencies: - dist: null category: share diff --git a/monitor/alerts-containers/datas.tf b/monitor/alerts-containers/datas.tf new file mode 100644 index 0000000..9797980 --- /dev/null +++ b/monitor/alerts-containers/datas.tf @@ -0,0 +1,16 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] +} diff --git a/monitor/alerts-containers/index.yaml b/monitor/alerts-containers/index.yaml new file mode 100644 index 0000000..3418ddb --- /dev/null +++ b/monitor/alerts-containers/index.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: alerts-containers + description: null +options: {} +dependencies: [] +providers: + kubernetes: true + authentik: null + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml similarity index 99% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml index 2b966a4..0018f4d 100644 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml +++ b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml @@ -1,3 +1,4 @@ +--- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml rename to monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml diff --git a/monitor/alerts-core/datas.tf b/monitor/alerts-core/datas.tf new file mode 100644 index 0000000..9797980 --- /dev/null +++ b/monitor/alerts-core/datas.tf @@ -0,0 +1,16 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] +} diff --git a/monitor/alerts-core/index.yaml b/monitor/alerts-core/index.yaml new file mode 100644 index 0000000..c621292 --- /dev/null +++ b/monitor/alerts-core/index.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: alerts-core + description: null +options: {} +dependencies: [] +providers: + kubernetes: true + authentik: null + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml similarity index 99% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml index 2e2bc25..ad24817 100644 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml +++ b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml @@ -1,3 +1,4 @@ +--- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml similarity index 100% rename from monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml rename to monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml diff --git a/monitor/monitor-control-plan/index.yaml b/monitor/monitor-control-plan/index.yaml index 572f03c..a6f0e86 100644 --- a/monitor/monitor-control-plan/index.yaml +++ b/monitor/monitor-control-plan/index.yaml @@ -5,75 +5,11 @@ category: monitor metadata: name: monitor-control-plan description: null -options: - sub-domain: - default: to-be-set - examples: - - to-be-set - type: string - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string - domain: - default: your-company - examples: - - your-company - type: string - ingress-class: - default: traefik - examples: - - traefik - type: string - images: - default: - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - examples: - - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - operator: - default: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: docker.io - type: string - repository: - default: to-be/defined - type: string - tag: - default: v1.0.0 - type: string - type: object - type: object - domain-name: - default: your_company.com - examples: - - your_company.com - type: string +options: {} dependencies: [] providers: kubernetes: true - authentik: true + authentik: null kubectl: true postgresql: null restapi: null diff --git a/monitor/prometheus/index.yaml b/monitor/prometheus/index.yaml index 5eb1b23..6cad951 100644 --- a/monitor/prometheus/index.yaml +++ b/monitor/prometheus/index.yaml @@ -6,41 +6,51 @@ metadata: name: prometheus description: null options: - shards: - default: 1 + issuer: + default: letsencrypt-prod examples: - - 1 - type: integer - retention: - default: 10d + - letsencrypt-prod + type: string + app-group: + default: monitor examples: - - 10d + - monitor type: string logLevel: default: info examples: - info type: string - domain-name: - default: your_company.com + enableAdminAPI: + default: false examples: - - your_company.com - type: string - domain: - default: your-company + - false + type: boolean + replicas: + default: 1 examples: - - your-company - type: string + - 1 + type: integer listenLocal: default: false examples: - false type: boolean + domain-name: + default: your_company.com + examples: + - your_company.com + type: string ingress-class: default: traefik examples: - traefik type: string + shards: + default: 1 + examples: + - 1 + type: integer images: default: prometheus: @@ -80,31 +90,21 @@ options: type: string type: object type: object - app-group: - default: monitor + domain: + default: your-company examples: - - monitor + - your-company type: string - issuer: - default: letsencrypt-prod + retention: + default: 10d examples: - - letsencrypt-prod + - 10d type: string sub-domain: default: prometheus examples: - prometheus type: string - replicas: - default: 1 - examples: - - 1 - type: integer - enableAdminAPI: - default: false - examples: - - false - type: boolean dependencies: - dist: null category: share diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml deleted file mode 100644 index 11b23d4..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-state-metrics - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.0.2" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.0.2 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-state-metrics - rules: - - alert: KubeStateMetricsListErrors - annotations: - description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors - summary: kube-state-metrics is experiencing errors in list operations. - expr: |- - (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) - / - sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster)) - > 0.01 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsWatchErrors - annotations: - description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors - summary: kube-state-metrics is experiencing errors in watch operations. - expr: |- - (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) - / - sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster)) - > 0.01 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsShardingMismatch - annotations: - description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch - summary: kube-state-metrics sharding is misconfigured. - expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) != 0 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsShardsMissing - annotations: - description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing - summary: kube-state-metrics shards are missing. - expr: |- - 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1 - - - sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster) - != 0 - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml deleted file mode 100644 index e8bdb8c..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubelet.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.0.2" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.0.2 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubelet.rules - rules: - - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.99' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.9' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.5' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml deleted file mode 100644 index 60cf44d..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system-kube-proxy - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.0.2" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.0.2 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-kube-proxy - rules: - - alert: KubeProxyDown - annotations: - description: KubeProxy has disappeared from Prometheus target discovery. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown - summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="kube-proxy"} == 1) - for: 15m - labels: - severity: critical \ No newline at end of file