Initial release
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml
|
||||
apiVersion: admissionregistration.k8s.io/v1
|
||||
kind: MutatingWebhookConfiguration
|
||||
metadata:
|
||||
name: prometheus-community-kube-admission
|
||||
annotations:
|
||||
certmanager.k8s.io/inject-ca-from: "vynil-monitor/prometheus-community-kube-admission"
|
||||
cert-manager.io/inject-ca-from: "vynil-monitor/prometheus-community-kube-admission"
|
||||
labels:
|
||||
app: kube-prometheus-stack-admission
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator-webhook
|
||||
webhooks:
|
||||
- name: prometheusrulemutate.monitoring.coreos.com
|
||||
failurePolicy: Fail
|
||||
rules:
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
apiVersions:
|
||||
- "*"
|
||||
resources:
|
||||
- prometheusrules
|
||||
operations:
|
||||
- CREATE
|
||||
- UPDATE
|
||||
clientConfig:
|
||||
service:
|
||||
namespace: vynil-monitor
|
||||
name: prometheus-community-kube-operator
|
||||
path: /admission-prometheusrules/mutate
|
||||
timeoutSeconds: 10
|
||||
admissionReviewVersions: ["v1", "v1beta1"]
|
||||
sideEffects: None
|
||||
@@ -0,0 +1,41 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml
|
||||
apiVersion: admissionregistration.k8s.io/v1
|
||||
kind: ValidatingWebhookConfiguration
|
||||
metadata:
|
||||
name: prometheus-community-kube-admission
|
||||
annotations:
|
||||
certmanager.k8s.io/inject-ca-from: "vynil-monitor/prometheus-community-kube-admission"
|
||||
cert-manager.io/inject-ca-from: "vynil-monitor/prometheus-community-kube-admission"
|
||||
labels:
|
||||
app: kube-prometheus-stack-admission
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator-webhook
|
||||
webhooks:
|
||||
- name: prometheusrulemutate.monitoring.coreos.com
|
||||
failurePolicy: Fail
|
||||
rules:
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
apiVersions:
|
||||
- "*"
|
||||
resources:
|
||||
- prometheusrules
|
||||
operations:
|
||||
- CREATE
|
||||
- UPDATE
|
||||
clientConfig:
|
||||
service:
|
||||
namespace: vynil-monitor
|
||||
name: prometheus-community-kube-operator
|
||||
path: /admission-prometheusrules/validate
|
||||
timeoutSeconds: 10
|
||||
admissionReviewVersions: ["v1", "v1beta1"]
|
||||
sideEffects: None
|
||||
@@ -0,0 +1,91 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-prometheus-stack-operator
|
||||
release: "prometheus-community"
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
spec:
|
||||
containers:
|
||||
- name: kube-prometheus-stack
|
||||
image: "quay.io/prometheus-operator/prometheus-operator:v0.72.0"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --kubelet-service=kube-system/prometheus-community-kube-kubelet
|
||||
- --localhost=127.0.0.1
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.72.0
|
||||
- --config-reloader-cpu-request=0
|
||||
- --config-reloader-cpu-limit=0
|
||||
- --config-reloader-memory-request=0
|
||||
- --config-reloader-memory-limit=0
|
||||
- --thanos-default-base-image=quay.io/thanos/thanos:v0.34.1
|
||||
- --secret-field-selector=type!=kubernetes.io/dockercfg,type!=kubernetes.io/service-account-token,type!=helm.sh/release.v1
|
||||
- --web.enable-tls=true
|
||||
- --web.cert-file=/cert/tls.crt
|
||||
- --web.key-file=/cert/tls.key
|
||||
- --web.listen-address=:10250
|
||||
- --web.tls-min-version=VersionTLS13
|
||||
ports:
|
||||
- containerPort: 10250
|
||||
name: https
|
||||
env:
|
||||
- name: GOGC
|
||||
value: "30"
|
||||
resources:
|
||||
{}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- name: tls-secret
|
||||
mountPath: /cert
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: tls-secret
|
||||
secret:
|
||||
defaultMode: 420
|
||||
secretName: prometheus-community-kube-admission
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
serviceAccountName: prometheus-community-kube-operator
|
||||
automountServiceAccountToken: true
|
||||
@@ -0,0 +1,15 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml
|
||||
# generate a server certificate for the apiservices to use
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: prometheus-community-kube-admission
|
||||
namespace: vynil-monitor
|
||||
spec:
|
||||
secretName: prometheus-community-kube-admission
|
||||
duration: "8760h0m0s"
|
||||
issuerRef:
|
||||
name: prometheus-community-kube-root-issuer
|
||||
dnsNames:
|
||||
- prometheus-community-kube-operator
|
||||
- prometheus-community-kube-operator.vynil-monitor.svc
|
||||
@@ -0,0 +1,14 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml
|
||||
# Generate a CA Certificate used to sign certificates for the webhook
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: prometheus-community-kube-root-cert
|
||||
namespace: vynil-monitor
|
||||
spec:
|
||||
secretName: prometheus-community-kube-root-cert
|
||||
duration: "43800h0m0s"
|
||||
issuerRef:
|
||||
name: prometheus-community-kube-self-signed-issuer
|
||||
commonName: "ca.webhook.kube-prometheus-stack"
|
||||
isCA: true
|
||||
@@ -0,0 +1,10 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml
|
||||
# Create an Issuer that uses the above generated CA certificate to issue certs
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: prometheus-community-kube-root-issuer
|
||||
namespace: vynil-monitor
|
||||
spec:
|
||||
ca:
|
||||
secretName: prometheus-community-kube-root-cert
|
||||
@@ -0,0 +1,10 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml
|
||||
# Create a selfsigned Issuer, in order to create a root CA certificate for
|
||||
# signing webhook serving certificates
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: prometheus-community-kube-self-signed-issuer
|
||||
namespace: vynil-monitor
|
||||
spec:
|
||||
selfSigned: {}
|
||||
199
monitor/prometheus/datas.tf
Normal file
199
monitor/prometheus/datas.tf
Normal file
@@ -0,0 +1,199 @@
|
||||
|
||||
locals {
|
||||
common-labels = {
|
||||
"vynil.solidite.fr/owner-name" = var.instance
|
||||
"vynil.solidite.fr/owner-namespace" = var.namespace
|
||||
"vynil.solidite.fr/owner-category" = var.category
|
||||
"vynil.solidite.fr/owner-component" = var.component
|
||||
"app.kubernetes.io/managed-by" = "vynil"
|
||||
"app.kubernetes.io/name" = var.component
|
||||
"app.kubernetes.io/instance" = var.instance
|
||||
}
|
||||
rb-patch = <<-EOF
|
||||
- op: replace
|
||||
path: /subjects/0/namespace
|
||||
value: "${var.namespace}"
|
||||
EOF
|
||||
|
||||
}
|
||||
data "kustomization_overlay" "data" {
|
||||
common_labels = local.common-labels
|
||||
namespace = var.namespace
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1 && length(regexall("WebhookConfiguration",file))<1]
|
||||
images {
|
||||
name = "quay.io/prometheus-operator/prometheus-operator"
|
||||
new_name = "${var.images.operator.registry}/${var.images.operator.repository}"
|
||||
new_tag = "${var.images.operator.tag}"
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "Deployment"
|
||||
name = "prometheus-community-kube-operator"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/imagePullPolicy
|
||||
value: "${var.images.operator.pullPolicy}"
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "ServiceMonitor"
|
||||
name = "prometheus-community-kube-operator"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /spec/namespaceSelector/matchNames/0
|
||||
value: "${var.namespace}"
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "Certificate"
|
||||
name = "prometheus-community-kube-admission"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /spec/dnsNames/1
|
||||
value: "prometheus-community-kube-operator.${var.namespace}.svc"
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "PrometheusRule"
|
||||
name = "prometheus-community-kube-prometheus-operator"
|
||||
}
|
||||
patch = <<-EOF
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-community-kube-prometheus-operator
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors
|
||||
summary: Errors while performing list operations in controller.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[10m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorSyncFailed
|
||||
annotations:
|
||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed
|
||||
summary: Last controller reconciliation failed
|
||||
expr: min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors
|
||||
summary: Errors while reconciling objects.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorStatusUpdateErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of status update operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorstatusupdateerrors
|
||||
summary: Errors while updating objects status.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNodeLookupErrors
|
||||
annotations:
|
||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors
|
||||
summary: Errors while reconciling Prometheus.
|
||||
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNotReady
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]) == 0)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorRejectedResources
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources
|
||||
summary: Resources rejected by Prometheus operator
|
||||
expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-community-kube-operator",namespace="${var.namespace}"}[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
EOF
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
data "kustomization_overlay" "data_no_ns" {
|
||||
common_labels = local.common-labels
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && (length(regexall("ClusterRole",file))>0 || length(regexall("WebhookConfiguration",file))>0)]
|
||||
|
||||
patches {
|
||||
target {
|
||||
kind = "ClusterRoleBinding"
|
||||
name = "prometheus-community-kube-operator"
|
||||
}
|
||||
patch = local.rb-patch
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "MutatingWebhookConfiguration"
|
||||
name = "prometheus-community-kube-admission"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /webhooks/0/clientConfig/service/namespace
|
||||
value: "${var.namespace}"
|
||||
- op: replace
|
||||
path: /metadata/annotations/certmanager.k8s.io~1inject-ca-from
|
||||
value: "${var.namespace}/prometheus-community-kube-admission"
|
||||
- op: replace
|
||||
path: /metadata/annotations/cert-manager.io~1inject-ca-from
|
||||
value: "${var.namespace}/prometheus-community-kube-admission"
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "ValidatingWebhookConfiguration"
|
||||
name = "prometheus-community-kube-admission"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /webhooks/0/clientConfig/service/namespace
|
||||
value: "${var.namespace}"
|
||||
- op: replace
|
||||
path: /metadata/annotations/certmanager.k8s.io~1inject-ca-from
|
||||
value: "${var.namespace}/prometheus-community-kube-admission"
|
||||
- op: replace
|
||||
path: /metadata/annotations/cert-manager.io~1inject-ca-from
|
||||
value: "${var.namespace}/prometheus-community-kube-admission"
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
58
monitor/prometheus/index.yaml
Normal file
58
monitor/prometheus/index.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
---
|
||||
apiVersion: vinyl.solidite.fr/v1beta1
|
||||
kind: Component
|
||||
category: monitor
|
||||
metadata:
|
||||
name: prometheus
|
||||
description: Prometheus operator
|
||||
options:
|
||||
images:
|
||||
default:
|
||||
operator:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus-operator/prometheus-operator
|
||||
tag: v0.71.0
|
||||
examples:
|
||||
- operator:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus-operator/prometheus-operator
|
||||
tag: v0.71.0
|
||||
properties:
|
||||
operator:
|
||||
default:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus-operator/prometheus-operator
|
||||
tag: v0.71.0
|
||||
properties:
|
||||
pullPolicy:
|
||||
default: IfNotPresent
|
||||
enum:
|
||||
- Always
|
||||
- Never
|
||||
- IfNotPresent
|
||||
type: string
|
||||
registry:
|
||||
default: quay.io
|
||||
type: string
|
||||
repository:
|
||||
default: prometheus-operator/prometheus-operator
|
||||
type: string
|
||||
tag:
|
||||
default: v0.71.0
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
dependencies: []
|
||||
providers:
|
||||
kubernetes: true
|
||||
authentik: null
|
||||
kubectl: true
|
||||
postgresql: null
|
||||
mysql: null
|
||||
restapi: null
|
||||
http: null
|
||||
gitea: null
|
||||
tfaddtype: null
|
||||
@@ -0,0 +1,92 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-community-kube-prometheus-operator
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors
|
||||
summary: Errors while performing list operations in controller.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[10m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m])) / sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorSyncFailed
|
||||
annotations:
|
||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed
|
||||
summary: Last controller reconciliation failed
|
||||
expr: min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors
|
||||
summary: Errors while reconciling objects.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorStatusUpdateErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of status update operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorstatusupdateerrors
|
||||
summary: Errors while updating objects status.
|
||||
expr: (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]))) / (sum by (cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNodeLookupErrors
|
||||
annotations:
|
||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors
|
||||
summary: Errors while reconciling Prometheus.
|
||||
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNotReady
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: min by (cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]) == 0)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorRejectedResources
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources
|
||||
summary: Resources rejected by Prometheus operator
|
||||
expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-community-kube-operator",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -0,0 +1,38 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
spec:
|
||||
|
||||
endpoints:
|
||||
- port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
serverName: prometheus-community-kube-operator
|
||||
ca:
|
||||
secret:
|
||||
name: prometheus-community-kube-admission
|
||||
key: ca.crt
|
||||
optional: false
|
||||
honorLabels: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-prometheus-stack-operator
|
||||
release: "prometheus-community"
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "vynil-monitor"
|
||||
@@ -0,0 +1,25 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus-community-kube-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-community-kube-operator
|
||||
namespace: vynil-monitor
|
||||
@@ -0,0 +1,108 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
resources:
|
||||
- alertmanagers
|
||||
- alertmanagers/finalizers
|
||||
- alertmanagers/status
|
||||
- alertmanagerconfigs
|
||||
- prometheuses
|
||||
- prometheuses/finalizers
|
||||
- prometheuses/status
|
||||
- prometheusagents
|
||||
- prometheusagents/finalizers
|
||||
- prometheusagents/status
|
||||
- thanosrulers
|
||||
- thanosrulers/finalizers
|
||||
- thanosrulers/status
|
||||
- scrapeconfigs
|
||||
- servicemonitors
|
||||
- podmonitors
|
||||
- probes
|
||||
- prometheusrules
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- list
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- services
|
||||
- services/finalizers
|
||||
- endpoints
|
||||
verbs:
|
||||
- get
|
||||
- create
|
||||
- update
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- namespaces
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- patch
|
||||
- create
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
verbs:
|
||||
- get
|
||||
45
monitor/prometheus/ressources_no_ns.tf
Normal file
45
monitor/prometheus/ressources_no_ns.tf
Normal file
@@ -0,0 +1,45 @@
|
||||
|
||||
# first loop through resources in ids_prio[0]
|
||||
resource "kustomization_resource" "pre_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[0]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
}
|
||||
|
||||
# then loop through resources in ids_prio[1]
|
||||
# and set an explicit depends_on on kustomization_resource.pre
|
||||
# wait 2 minutes for any deployment or daemonset to become ready
|
||||
resource "kustomization_resource" "main_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[1]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
wait = true
|
||||
timeouts {
|
||||
create = "5m"
|
||||
update = "5m"
|
||||
}
|
||||
|
||||
depends_on = [kustomization_resource.pre_no_ns]
|
||||
}
|
||||
|
||||
# finally, loop through resources in ids_prio[2]
|
||||
# and set an explicit depends_on on kustomization_resource.main
|
||||
resource "kustomization_resource" "post_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[2]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
|
||||
depends_on = [kustomization_resource.main_no_ns]
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
automountServiceAccountToken: true
|
||||
@@ -0,0 +1,27 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus-operator/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-community-kube-operator
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus-community
|
||||
app.kubernetes.io/version: "57.0.3"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-57.0.3
|
||||
release: "prometheus-community"
|
||||
heritage: "Helm"
|
||||
app: kube-prometheus-stack-operator
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator
|
||||
app.kubernetes.io/component: prometheus-operator
|
||||
spec:
|
||||
ports:
|
||||
- name: https
|
||||
port: 443
|
||||
targetPort: https
|
||||
selector:
|
||||
app: kube-prometheus-stack-operator
|
||||
release: "prometheus-community"
|
||||
type: "ClusterIP"
|
||||
Reference in New Issue
Block a user