fix
This commit is contained in:
@@ -1,212 +0,0 @@
|
||||
locals {
|
||||
authentik_url = "http://authentik.${var.domain}-auth.svc"
|
||||
authentik_token = data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]
|
||||
common-labels = {
|
||||
"vynil.solidite.fr/owner-name" = var.instance
|
||||
"vynil.solidite.fr/owner-namespace" = var.namespace
|
||||
"vynil.solidite.fr/owner-category" = var.category
|
||||
"vynil.solidite.fr/owner-component" = var.component
|
||||
"app.kubernetes.io/managed-by" = "vynil"
|
||||
"app.kubernetes.io/instance" = var.instance
|
||||
}
|
||||
rb-patch = <<-EOF
|
||||
- op: replace
|
||||
path: /subjects/0/namespace
|
||||
value: "${var.namespace}"
|
||||
EOF
|
||||
}
|
||||
|
||||
data "kubernetes_secret_v1" "authentik" {
|
||||
metadata {
|
||||
name = "authentik"
|
||||
namespace = "${var.domain}-auth"
|
||||
}
|
||||
}
|
||||
|
||||
data "kustomization_overlay" "data" {
|
||||
common_labels = local.common-labels
|
||||
namespace = var.namespace
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1 && length(regexall("Service_prometheus",file))<1]
|
||||
patches {
|
||||
target {
|
||||
kind = "ConfigMap"
|
||||
name = "prometheus-kube-prometheus-grafana-datasource"
|
||||
}
|
||||
patch = <<-EOF
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-grafana-datasource
|
||||
data:
|
||||
datasource.yaml: |-
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
url: http://${var.component}-${var.instance}.${var.namespace}:9090/
|
||||
access: proxy
|
||||
isDefault: false
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
timeInterval: 30s
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "ServiceMonitor"
|
||||
name = "prometheus-kube-prometheus-prometheus"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /spec/namespaceSelector/matchNames/0
|
||||
value: "${var.namespace}"
|
||||
EOF
|
||||
}
|
||||
|
||||
patches {
|
||||
target {
|
||||
kind = "PrometheusRule"
|
||||
name = "prometheus-kube-prometheus-prometheus"
|
||||
}
|
||||
patch = <<-EOF
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus
|
||||
rules:
|
||||
- alert: PrometheusBadConfig
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) == 0
|
||||
- alert: PrometheusSDRefreshFailure
|
||||
expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[10m]) > 0
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
expr: |-
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
predict_linear(prometheus_notifications_queue_length{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m], 60 * 30)
|
||||
>
|
||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
)
|
||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||
expr: |-
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) < 1
|
||||
- alert: PrometheusTSDBReloadsFailing
|
||||
expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[3h]) > 0
|
||||
- alert: PrometheusTSDBCompactionsFailing
|
||||
expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[3h]) > 0
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
expr: |-
|
||||
(
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) <= 0
|
||||
and
|
||||
(
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}) > 0
|
||||
or
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}) > 0
|
||||
)
|
||||
)
|
||||
- alert: PrometheusDuplicateTimestamps
|
||||
expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusOutOfOrderTimestamps
|
||||
expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
expr: |-
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]))
|
||||
/
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]))
|
||||
+
|
||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]))
|
||||
)
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
- ignoring(remote_name, url) group_right
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
)
|
||||
> 120
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
>
|
||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])
|
||||
)
|
||||
- alert: PrometheusRuleFailures
|
||||
expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusMissingRuleEvaluations
|
||||
expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusTargetLimitHit
|
||||
expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusLabelLimitHit
|
||||
expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusScrapeBodySizeLimitHit
|
||||
expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusScrapeSampleLimitHit
|
||||
expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0
|
||||
- alert: PrometheusTargetSyncFailure
|
||||
expr: increase(prometheus_target_sync_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[30m]) > 0
|
||||
- alert: PrometheusHighQueryLoad
|
||||
expr: avg_over_time(prometheus_engine_queries{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0.8
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
expr: |-
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
EOF
|
||||
}
|
||||
patches {
|
||||
target {
|
||||
kind = "ServiceMonitor"
|
||||
name = "prometheus-community-prometheus-node-exporter"
|
||||
}
|
||||
patch = <<-EOF
|
||||
- op: replace
|
||||
path: /spec/selector/matchLabels/app.kubernetes.io~1instance
|
||||
value: "${var.instance}"
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
data "kustomization_overlay" "data_no_ns" {
|
||||
resources = [for file in fileset(path.module, "*.yaml"): file if (length(regexall("ClusterRole",file))>0 || length(regexall("Service_prometheus",file))>0)]
|
||||
|
||||
patches {
|
||||
target {
|
||||
kind = "ClusterRoleBinding"
|
||||
name = "prometheus-kube-prometheus-prometheus"
|
||||
}
|
||||
patch = local.rb-patch
|
||||
}
|
||||
}
|
||||
@@ -1,125 +0,0 @@
|
||||
---
|
||||
apiVersion: vinyl.solidite.fr/v1beta1
|
||||
kind: Component
|
||||
category: monitor
|
||||
metadata:
|
||||
name: prometheus
|
||||
description: null
|
||||
options:
|
||||
domain_name:
|
||||
default: your_company.com
|
||||
examples:
|
||||
- your_company.com
|
||||
type: string
|
||||
domain:
|
||||
default: your-company
|
||||
examples:
|
||||
- your-company
|
||||
type: string
|
||||
replicas:
|
||||
default: 1
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
issuer:
|
||||
default: letsencrypt-prod
|
||||
examples:
|
||||
- letsencrypt-prod
|
||||
type: string
|
||||
images:
|
||||
default:
|
||||
prometheus:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus/prometheus
|
||||
tag: v2.49.1
|
||||
examples:
|
||||
- prometheus:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus/prometheus
|
||||
tag: v2.49.1
|
||||
properties:
|
||||
prometheus:
|
||||
default:
|
||||
pullPolicy: IfNotPresent
|
||||
registry: quay.io
|
||||
repository: prometheus/prometheus
|
||||
tag: v2.49.1
|
||||
properties:
|
||||
pullPolicy:
|
||||
default: IfNotPresent
|
||||
enum:
|
||||
- Always
|
||||
- Never
|
||||
- IfNotPresent
|
||||
type: string
|
||||
registry:
|
||||
default: quay.io
|
||||
type: string
|
||||
repository:
|
||||
default: prometheus/prometheus
|
||||
type: string
|
||||
tag:
|
||||
default: v2.49.1
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
sub_domain:
|
||||
default: prometheus
|
||||
examples:
|
||||
- prometheus
|
||||
type: string
|
||||
ingress_class:
|
||||
default: traefik
|
||||
examples:
|
||||
- traefik
|
||||
type: string
|
||||
logLevel:
|
||||
default: info
|
||||
examples:
|
||||
- info
|
||||
type: string
|
||||
app_group:
|
||||
default: monitor
|
||||
examples:
|
||||
- monitor
|
||||
type: string
|
||||
listenLocal:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
enableAdminAPI:
|
||||
default: false
|
||||
examples:
|
||||
- false
|
||||
type: boolean
|
||||
shards:
|
||||
default: 1
|
||||
examples:
|
||||
- 1
|
||||
type: integer
|
||||
retention:
|
||||
default: 10d
|
||||
examples:
|
||||
- 10d
|
||||
type: string
|
||||
alertmanager:
|
||||
default: alertmanager-alertmanager
|
||||
examples:
|
||||
- alertmanager-alertmanager
|
||||
type: string
|
||||
dependencies:
|
||||
- dist: null
|
||||
category: share
|
||||
component: authentik-forward
|
||||
providers:
|
||||
kubernetes: true
|
||||
authentik: true
|
||||
kubectl: true
|
||||
postgresql: null
|
||||
restapi: true
|
||||
http: true
|
||||
gitea: null
|
||||
tfaddtype: null
|
||||
@@ -1,32 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-kubelet.rules
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: kubelet.rules
|
||||
rules:
|
||||
- expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.9'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: '0.5'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
@@ -1,29 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-kubernetes-system-kube-proxy
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: kubernetes-system-kube-proxy
|
||||
rules:
|
||||
- alert: KubeProxyDown
|
||||
annotations:
|
||||
description: KubeProxy has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: absent(up{job="kube-proxy"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -1,280 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus
|
||||
rules:
|
||||
- alert: PrometheusBadConfig
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
|
||||
summary: Failed Prometheus configuration reload.
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusSDRefreshFailure
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to refresh SD with mechanism {{$labels.mechanism}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheussdrefreshfailure
|
||||
summary: Failed Prometheus SD refresh.
|
||||
expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[10m]) > 0
|
||||
for: 20m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
annotations:
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
|
||||
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
||||
expr: |-
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
predict_linear(prometheus_notifications_queue_length{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m], 60 * 30)
|
||||
>
|
||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||
expr: |-
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
|
||||
summary: Prometheus is not connected to any Alertmanagers.
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBReloadsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
|
||||
summary: Prometheus has issues reloading blocks from disk.
|
||||
expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBCompactionsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
|
||||
summary: Prometheus has issues compacting blocks.
|
||||
expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |-
|
||||
(
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) <= 0
|
||||
and
|
||||
(
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}) > 0
|
||||
or
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}) > 0
|
||||
)
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusDuplicateTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
|
||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||
expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOutOfOrderTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
|
||||
summary: Prometheus drops samples with out-of-order timestamps.
|
||||
expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |-
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]))
|
||||
/
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]))
|
||||
+
|
||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]))
|
||||
)
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
- ignoring(remote_name, url) group_right
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
)
|
||||
> 120
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}` $labels.instance | query | first | value }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
|
||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||
expr: |-
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
>
|
||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusRuleFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
|
||||
summary: Prometheus is failing rule evaluations.
|
||||
expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusMissingRuleEvaluations
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
|
||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||
expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTargetLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
|
||||
expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusLabelLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
|
||||
expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeBodySizeLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
|
||||
summary: Prometheus has dropped some targets that exceeded body size limit.
|
||||
expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeSampleLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
|
||||
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
|
||||
expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTargetSyncFailure
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
|
||||
summary: Prometheus has failed to sync targets.
|
||||
expr: increase(prometheus_target_sync_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[30m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusHighQueryLoad
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
|
||||
summary: Prometheus is reaching its maximum capacity serving concurrent requests.
|
||||
expr: avg_over_time(prometheus_engine_queries{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0.8
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |-
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -1,29 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-coredns
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-coredns
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
jobLabel: jobLabel
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-prometheus-stack-coredns
|
||||
release: "prometheus"
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "kube-system"
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
@@ -1,29 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-kube-proxy
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-kube-proxy
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
jobLabel: jobLabel
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-prometheus-stack-kube-proxy
|
||||
release: "prometheus"
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "kube-system"
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
@@ -1,95 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-kubelet
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-kubelet
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
|
||||
attachMetadata:
|
||||
node: false
|
||||
endpoints:
|
||||
- port: https-metrics
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
honorTimestamps: true
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
- port: https-metrics
|
||||
scheme: https
|
||||
path: /metrics/cadvisor
|
||||
honorLabels: true
|
||||
honorTimestamps: true
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: container_memory_(mapped_file|swap)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: container_(file_descriptors|tasks_state|threads_max)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: container_spec.*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: .+;
|
||||
sourceLabels:
|
||||
- id
|
||||
- pod
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
- port: https-metrics
|
||||
scheme: https
|
||||
path: /metrics/probes
|
||||
honorLabels: true
|
||||
honorTimestamps: true
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
jobLabel: k8s-app
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kubelet
|
||||
k8s-app: kubelet
|
||||
@@ -1,32 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/servicemonitor.yaml
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
spec:
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
release: "prometheus"
|
||||
self-monitor: "true"
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "vynil-monitor"
|
||||
endpoints:
|
||||
- port: http-web
|
||||
path: "/metrics"
|
||||
- port: reloader-web
|
||||
scheme: http
|
||||
path: "/metrics"
|
||||
@@ -1,75 +0,0 @@
|
||||
locals {
|
||||
dns_name = "${var.sub_domain}.${var.domain_name}"
|
||||
dns_names = [local.dns_name]
|
||||
app_name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance)
|
||||
icon = "favicon.ico"
|
||||
request_headers = {
|
||||
"Content-Type" = "application/json"
|
||||
Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}"
|
||||
}
|
||||
service = {
|
||||
"name" = "prometheus-operated"
|
||||
"port" = {
|
||||
"number" = 9090
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
source = "git::https://git.solidite.fr/vynil/kydah-modules.git//ingress"
|
||||
component = ""
|
||||
instance = var.instance
|
||||
namespace = var.namespace
|
||||
issuer = var.issuer
|
||||
ingress_class = var.ingress_class
|
||||
labels = local.common-labels
|
||||
dns_names = local.dns_names
|
||||
middlewares = [module.forward.middleware]
|
||||
services = [local.service]
|
||||
providers = {
|
||||
kubectl = kubectl
|
||||
}
|
||||
}
|
||||
|
||||
module "application" {
|
||||
source = "git::https://git.solidite.fr/vynil/kydah-modules.git//application"
|
||||
component = var.component
|
||||
instance = var.instance
|
||||
app_group = var.app_group
|
||||
dns_name = local.dns_name
|
||||
icon = local.icon
|
||||
protocol_provider = module.forward.provider-id
|
||||
providers = {
|
||||
authentik = authentik
|
||||
}
|
||||
}
|
||||
|
||||
provider "restapi" {
|
||||
uri = "http://authentik.${var.domain}-auth.svc/api/v3/"
|
||||
headers = local.request_headers
|
||||
create_method = "PATCH"
|
||||
update_method = "PATCH"
|
||||
destroy_method = "PATCH"
|
||||
write_returns_object = true
|
||||
id_attribute = "name"
|
||||
}
|
||||
|
||||
module "forward" {
|
||||
source = "git::https://git.solidite.fr/vynil/kydah-modules.git//forward"
|
||||
component = var.component
|
||||
instance = var.instance
|
||||
domain = var.domain
|
||||
namespace = var.namespace
|
||||
ingress_class = var.ingress_class
|
||||
labels = local.common-labels
|
||||
dns_names = local.dns_names
|
||||
service = local.service
|
||||
icon = local.icon
|
||||
request_headers = local.request_headers
|
||||
providers = {
|
||||
restapi = restapi
|
||||
http = http
|
||||
kubectl = kubectl
|
||||
authentik = authentik
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
resource "kubectl_manifest" "prometheus" {
|
||||
yaml_body = <<-EOF
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: "${var.namespace}"
|
||||
labels: ${jsonencode(local.common-labels)}
|
||||
spec:
|
||||
image: "${var.images.prometheus.registry}/${var.images.prometheus.repository}:${var.images.prometheus.tag}"
|
||||
version: ${var.images.prometheus.tag}
|
||||
externalUrl: http://${var.component}-${var.instance}.${var.namespace}:9090
|
||||
replicas: ${var.replicas}
|
||||
shards: ${var.shards}
|
||||
logLevel: ${var.logLevel}
|
||||
listenLocal: ${var.listenLocal}
|
||||
enableAdminAPI: ${var.enableAdminAPI}
|
||||
retention: "${var.retention}"
|
||||
tsdb:
|
||||
outOfOrderTimeWindow: 0s
|
||||
walCompression: true
|
||||
routePrefix: "/"
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- namespace: ${var.namespace}
|
||||
name: ${var.alertmanager}
|
||||
port: http-web
|
||||
pathPrefix: "/"
|
||||
apiVersion: v2
|
||||
scrapeConfigNamespaceSelector: {}
|
||||
scrapeConfigSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/managed-by: vynil
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/managed-by: vynil
|
||||
podMonitorNamespaceSelector: {}
|
||||
podMonitorSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/managed-by: vynil
|
||||
probeNamespaceSelector: {}
|
||||
probeSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/managed-by: vynil
|
||||
ruleNamespaceSelector: {}
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/managed-by: vynil
|
||||
serviceAccountName: prometheus-kube-prometheus-prometheus
|
||||
securityContext:
|
||||
fsGroup: 2000
|
||||
runAsGroup: 2000
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
portName: http-web
|
||||
hostNetwork: false
|
||||
EOF
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
labels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
namespace: vynil-monitor
|
||||
@@ -1,33 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/clusterrole.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
labels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
rules:
|
||||
# This permission are not in the kube-prometheus repo
|
||||
# they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
|
||||
verbs: ["get"]
|
||||
@@ -1,45 +0,0 @@
|
||||
|
||||
# first loop through resources in ids_prio[0]
|
||||
resource "kustomization_resource" "pre_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[0]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
}
|
||||
|
||||
# then loop through resources in ids_prio[1]
|
||||
# and set an explicit depends_on on kustomization_resource.pre
|
||||
# wait 2 minutes for any deployment or daemonset to become ready
|
||||
resource "kustomization_resource" "main_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[1]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
wait = true
|
||||
timeouts {
|
||||
create = "5m"
|
||||
update = "5m"
|
||||
}
|
||||
|
||||
depends_on = [kustomization_resource.pre_no_ns]
|
||||
}
|
||||
|
||||
# finally, loop through resources in ids_prio[2]
|
||||
# and set an explicit depends_on on kustomization_resource.main
|
||||
resource "kustomization_resource" "post_no_ns" {
|
||||
for_each = data.kustomization_overlay.data_no_ns.ids_prio[2]
|
||||
|
||||
manifest = (
|
||||
contains(["_/Secret"], regex("(?P<group_kind>.*/.*)/.*/.*", each.value)["group_kind"])
|
||||
? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value])
|
||||
: data.kustomization_overlay.data_no_ns.manifests[each.value]
|
||||
)
|
||||
|
||||
depends_on = [kustomization_resource.main_no_ns]
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
locals {
|
||||
svc-label = merge(local.common-labels, {
|
||||
"app" = "kube-prometheus-stack-prometheus"
|
||||
"release" = "prometheus"
|
||||
"self-monitor" = "true"
|
||||
|
||||
})
|
||||
}
|
||||
resource "kubectl_manifest" "svc" {
|
||||
yaml_body = <<-EOF
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: "${var.component}-${var.instance}"
|
||||
namespace: "${var.namespace}"
|
||||
labels: ${jsonencode(local.svc-label)}
|
||||
spec:
|
||||
ports:
|
||||
- name: http-web
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
- name: reloader-web
|
||||
appProtocol: http
|
||||
port: 8080
|
||||
targetPort: reloader-web
|
||||
publishNotReadyAddresses: false
|
||||
selector:
|
||||
app.kubernetes.io/name: prometheus
|
||||
operator.prometheus.io/name: ${kubectl_manifest.prometheus.name}
|
||||
sessionAffinity: None
|
||||
type: "ClusterIP"
|
||||
EOF
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-grafana-datasource
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
grafana_datasource: "1"
|
||||
app: kube-prometheus-stack-grafana
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
data:
|
||||
datasource.yaml: |-
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
url: http://prometheus-kube-prometheus-prometheus.vynil-monitor:9090/
|
||||
access: proxy
|
||||
isDefault: true
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
timeInterval: 30s
|
||||
- name: Alertmanager
|
||||
type: alertmanager
|
||||
uid: alertmanager
|
||||
url: http://prometheus-kube-prometheus-alertmanager.vynil-monitor:9093/
|
||||
access: proxy
|
||||
jsonData:
|
||||
handleGrafanaManagedAlerts: false
|
||||
implementation: prometheus
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,18 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/prometheus/secret.yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
app.kubernetes.io/component: prometheus
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
data:
|
||||
@@ -1,19 +0,0 @@
|
||||
---
|
||||
# Source: kube-prometheus-stack/templates/prometheus/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-prometheus
|
||||
namespace: vynil-monitor
|
||||
labels:
|
||||
app: kube-prometheus-stack-prometheus
|
||||
app.kubernetes.io/name: kube-prometheus-stack-prometheus
|
||||
app.kubernetes.io/component: prometheus
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
@@ -1,26 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/exporters/core-dns/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-coredns
|
||||
labels:
|
||||
app: kube-prometheus-stack-coredns
|
||||
jobLabel: coredns
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
namespace: kube-system
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 9153
|
||||
protocol: TCP
|
||||
targetPort: 9153
|
||||
selector:
|
||||
k8s-app: kube-dns
|
||||
@@ -1,27 +0,0 @@
|
||||
# Source: kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-kube-prometheus-kube-proxy
|
||||
labels:
|
||||
app: kube-prometheus-stack-kube-proxy
|
||||
jobLabel: kube-proxy
|
||||
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/instance: prometheus
|
||||
app.kubernetes.io/version: "56.1.0"
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
chart: kube-prometheus-stack-56.1.0
|
||||
release: "prometheus"
|
||||
heritage: "Helm"
|
||||
namespace: kube-system
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 10249
|
||||
protocol: TCP
|
||||
targetPort: 10249
|
||||
selector:
|
||||
k8s-app: kube-proxy
|
||||
type: ClusterIP
|
||||
Reference in New Issue
Block a user