fix
This commit is contained in:
@@ -28,17 +28,17 @@ data "kustomization_overlay" "data" {
|
|||||||
patches {
|
patches {
|
||||||
target {
|
target {
|
||||||
kind = "Alertmanager"
|
kind = "Alertmanager"
|
||||||
name = "prometheus-community-kube-alertmanager"
|
name = "alertmanager-kube-promethe-alertmanager"
|
||||||
}
|
}
|
||||||
patch = <<-EOF
|
patch = <<-EOF
|
||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: Alertmanager
|
kind: Alertmanager
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager
|
name: alertmanager-kube-promethe-alertmanager
|
||||||
spec:
|
spec:
|
||||||
image: "${var.images.alertmanager.registry}/${var.images.alertmanager.repository}:${var.images.alertmanager.tag}"
|
image: "${var.images.alertmanager.registry}/${var.images.alertmanager.repository}:${var.images.alertmanager.tag}"
|
||||||
version: ${var.images.alertmanager.tag}
|
version: ${var.images.alertmanager.tag}
|
||||||
externalUrl: http://alertmanager-operated.${var.namespace}:9093
|
externalUrl: http://${var.component}-${var.instance}.${var.namespace}:9093
|
||||||
replicas: ${var.replicas}
|
replicas: ${var.replicas}
|
||||||
listenLocal: ${var.listenLocal}
|
listenLocal: ${var.listenLocal}
|
||||||
logLevel: "${var.logLevel}"
|
logLevel: "${var.logLevel}"
|
||||||
@@ -72,7 +72,7 @@ data "kustomization_overlay" "data" {
|
|||||||
patches {
|
patches {
|
||||||
target {
|
target {
|
||||||
kind = "ServiceMonitor"
|
kind = "ServiceMonitor"
|
||||||
name = "prometheus-community-kube-alertmanager"
|
name = "alertmanager-kube-promethe-alertmanager"
|
||||||
}
|
}
|
||||||
patch = <<-EOF
|
patch = <<-EOF
|
||||||
- op: replace
|
- op: replace
|
||||||
@@ -83,13 +83,13 @@ data "kustomization_overlay" "data" {
|
|||||||
patches {
|
patches {
|
||||||
target {
|
target {
|
||||||
kind = "PrometheusRule"
|
kind = "PrometheusRule"
|
||||||
name = "prometheus-community-kube-alertmanager.rules"
|
name = "alertmanager-kube-promethe-alertmanager.rules"
|
||||||
}
|
}
|
||||||
patch = <<-EOF
|
patch = <<-EOF
|
||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: PrometheusRule
|
kind: PrometheusRule
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager.rules
|
name: alertmanager-kube-promethe-alertmanager.rules
|
||||||
spec:
|
spec:
|
||||||
groups:
|
groups:
|
||||||
- name: alertmanager.rules
|
- name: alertmanager.rules
|
||||||
@@ -98,53 +98,53 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
max_over_time(alertmanager_config_last_reload_successful{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) == 0
|
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) == 0
|
||||||
- alert: AlertmanagerMembersInconsistent
|
- alert: AlertmanagerMembersInconsistent
|
||||||
expr: |-
|
expr: |-
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m])
|
max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m])
|
||||||
< on (namespace,service,cluster) group_left
|
< on (namespace,service,cluster) group_left
|
||||||
count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]))
|
count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]))
|
||||||
- alert: AlertmanagerFailedToSendAlerts
|
- alert: AlertmanagerFailedToSendAlerts
|
||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
expr: |-
|
expr: |-
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
expr: |-
|
expr: |-
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
- alert: AlertmanagerConfigInconsistent
|
- alert: AlertmanagerConfigInconsistent
|
||||||
expr: |-
|
expr: |-
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"})
|
count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"})
|
||||||
)
|
)
|
||||||
!= 1
|
!= 1
|
||||||
- alert: AlertmanagerClusterDown
|
- alert: AlertmanagerClusterDown
|
||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
avg_over_time(up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) < 0.5
|
avg_over_time(up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) < 0.5
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}
|
up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
>= 0.5
|
>= 0.5
|
||||||
@@ -152,11 +152,11 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
changes(process_start_time_seconds{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[10m]) > 4
|
changes(process_start_time_seconds{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[10m]) > 4
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}
|
up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
>= 0.5
|
>= 0.5
|
||||||
|
|||||||
@@ -6,45 +6,30 @@ metadata:
|
|||||||
name: alertmanager
|
name: alertmanager
|
||||||
description: null
|
description: null
|
||||||
options:
|
options:
|
||||||
|
replicas:
|
||||||
|
default: 1
|
||||||
|
examples:
|
||||||
|
- 1
|
||||||
|
type: integer
|
||||||
domain:
|
domain:
|
||||||
default: your-company
|
default: your-company
|
||||||
examples:
|
examples:
|
||||||
- your-company
|
- your-company
|
||||||
type: string
|
type: string
|
||||||
app-group:
|
|
||||||
default: monitor
|
|
||||||
examples:
|
|
||||||
- monitor
|
|
||||||
type: string
|
|
||||||
issuer:
|
issuer:
|
||||||
default: letsencrypt-prod
|
default: letsencrypt-prod
|
||||||
examples:
|
examples:
|
||||||
- letsencrypt-prod
|
- letsencrypt-prod
|
||||||
type: string
|
type: string
|
||||||
sub-domain:
|
|
||||||
default: alertmanager
|
|
||||||
examples:
|
|
||||||
- alertmanager
|
|
||||||
type: string
|
|
||||||
retention:
|
|
||||||
default: 120h
|
|
||||||
examples:
|
|
||||||
- 120h
|
|
||||||
type: string
|
|
||||||
ingress-class:
|
|
||||||
default: traefik
|
|
||||||
examples:
|
|
||||||
- traefik
|
|
||||||
type: string
|
|
||||||
listenLocal:
|
listenLocal:
|
||||||
default: false
|
default: false
|
||||||
examples:
|
examples:
|
||||||
- false
|
- false
|
||||||
type: boolean
|
type: boolean
|
||||||
domain-name:
|
sub-domain:
|
||||||
default: your_company.com
|
default: alertmanager
|
||||||
examples:
|
examples:
|
||||||
- your_company.com
|
- alertmanager
|
||||||
type: string
|
type: string
|
||||||
images:
|
images:
|
||||||
default:
|
default:
|
||||||
@@ -85,16 +70,31 @@ options:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
replicas:
|
retention:
|
||||||
default: 1
|
default: 120h
|
||||||
examples:
|
examples:
|
||||||
- 1
|
- 120h
|
||||||
type: integer
|
type: string
|
||||||
|
domain-name:
|
||||||
|
default: your_company.com
|
||||||
|
examples:
|
||||||
|
- your_company.com
|
||||||
|
type: string
|
||||||
|
app-group:
|
||||||
|
default: monitor
|
||||||
|
examples:
|
||||||
|
- monitor
|
||||||
|
type: string
|
||||||
logLevel:
|
logLevel:
|
||||||
default: info
|
default: info
|
||||||
examples:
|
examples:
|
||||||
- info
|
- info
|
||||||
type: string
|
type: string
|
||||||
|
ingress-class:
|
||||||
|
default: traefik
|
||||||
|
examples:
|
||||||
|
- traefik
|
||||||
|
type: string
|
||||||
dependencies:
|
dependencies:
|
||||||
- dist: null
|
- dist: null
|
||||||
category: share
|
category: share
|
||||||
|
|||||||
@@ -2,25 +2,25 @@
|
|||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: Alertmanager
|
kind: Alertmanager
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager
|
name: alertmanager-kube-promethe-alertmanager
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
labels:
|
labels:
|
||||||
app: kube-prometheus-stack-alertmanager
|
app: kube-prometheus-stack-alertmanager
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
spec:
|
spec:
|
||||||
image: "quay.io/prometheus/alertmanager:v0.26.0"
|
image: "quay.io/prometheus/alertmanager:v0.26.0"
|
||||||
version: v0.26.0
|
version: v0.26.0
|
||||||
replicas: 1
|
replicas: 1
|
||||||
listenLocal: false
|
listenLocal: false
|
||||||
serviceAccountName: prometheus-community-kube-alertmanager
|
serviceAccountName: alertmanager-kube-promethe-alertmanager
|
||||||
externalUrl: http://prometheus-community-kube-alertmanager.vynil-monitor:9093
|
externalUrl: http://alertmanager-kube-promethe-alertmanager.vynil-monitor:9093
|
||||||
paused: false
|
paused: false
|
||||||
logFormat: "logfmt"
|
logFormat: "logfmt"
|
||||||
logLevel: "info"
|
logLevel: "info"
|
||||||
@@ -2,17 +2,17 @@
|
|||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: PrometheusRule
|
kind: PrometheusRule
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager.rules
|
name: alertmanager-kube-promethe-alertmanager.rules
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
labels:
|
labels:
|
||||||
app: kube-prometheus-stack
|
app: kube-prometheus-stack
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
spec:
|
spec:
|
||||||
groups:
|
groups:
|
||||||
@@ -26,7 +26,7 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
max_over_time(alertmanager_config_last_reload_successful{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) == 0
|
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) == 0
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@@ -38,9 +38,9 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m])
|
max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m])
|
||||||
< on (namespace,service,cluster) group_left
|
< on (namespace,service,cluster) group_left
|
||||||
count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]))
|
count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]))
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@@ -51,9 +51,9 @@ spec:
|
|||||||
summary: An Alertmanager instance failed to send notifications.
|
summary: An Alertmanager instance failed to send notifications.
|
||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
for: 5m
|
for: 5m
|
||||||
@@ -66,9 +66,9 @@ spec:
|
|||||||
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
||||||
expr: |-
|
expr: |-
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
for: 5m
|
for: 5m
|
||||||
@@ -81,9 +81,9 @@ spec:
|
|||||||
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
||||||
expr: |-
|
expr: |-
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m])
|
||||||
/
|
/
|
||||||
ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m])
|
ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m])
|
||||||
)
|
)
|
||||||
> 0.01
|
> 0.01
|
||||||
for: 5m
|
for: 5m
|
||||||
@@ -96,7 +96,7 @@ spec:
|
|||||||
summary: Alertmanager instances within the same cluster have different configurations.
|
summary: Alertmanager instances within the same cluster have different configurations.
|
||||||
expr: |-
|
expr: |-
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"})
|
count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"})
|
||||||
)
|
)
|
||||||
!= 1
|
!= 1
|
||||||
for: 20m
|
for: 20m
|
||||||
@@ -110,11 +110,11 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
avg_over_time(up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) < 0.5
|
avg_over_time(up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) < 0.5
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}
|
up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
>= 0.5
|
>= 0.5
|
||||||
@@ -129,11 +129,11 @@ spec:
|
|||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
changes(process_start_time_seconds{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[10m]) > 4
|
changes(process_start_time_seconds{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[10m]) > 4
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (namespace,service,cluster) (
|
count by (namespace,service,cluster) (
|
||||||
up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}
|
up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
>= 0.5
|
>= 0.5
|
||||||
@@ -2,24 +2,24 @@
|
|||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager
|
name: alertmanager-kube-promethe-alertmanager
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
labels:
|
labels:
|
||||||
app: kube-prometheus-stack-alertmanager
|
app: kube-prometheus-stack-alertmanager
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
spec:
|
spec:
|
||||||
|
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: kube-prometheus-stack-alertmanager
|
app: kube-prometheus-stack-alertmanager
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
self-monitor: "true"
|
self-monitor: "true"
|
||||||
namespaceSelector:
|
namespaceSelector:
|
||||||
matchNames:
|
matchNames:
|
||||||
33
monitor/alertmanager/svc.tf
Normal file
33
monitor/alertmanager/svc.tf
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
locals {
|
||||||
|
svc-label = merge(local.common-labels, {
|
||||||
|
"app" = "kube-prometheus-stack-prometheus"
|
||||||
|
"release" = "prometheus"
|
||||||
|
"self-monitor" = "true"
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
resource "kubectl_manifest" "svc" {
|
||||||
|
yaml_body = <<-EOF
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: "${var.component}-${var.instance}"
|
||||||
|
namespace: "${var.namespace}"
|
||||||
|
labels: ${jsonencode(local.svc-label)}
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: http-web
|
||||||
|
port: 9093
|
||||||
|
targetPort: 9093
|
||||||
|
protocol: TCP
|
||||||
|
- name: reloader-web
|
||||||
|
appProtocol: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: reloader-web
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
alertmanager: alertmanager-kube-promethe-alertmanager
|
||||||
|
sessionAffinity: None
|
||||||
|
type: "ClusterIP"
|
||||||
|
EOF
|
||||||
|
}
|
||||||
@@ -3,7 +3,7 @@ apiVersion: v1
|
|||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
name: prometheus-community-kube-alertmanager-overview
|
name: alertmanager-kube-promethe-alertmanager-overview
|
||||||
annotations:
|
annotations:
|
||||||
{}
|
{}
|
||||||
labels:
|
labels:
|
||||||
@@ -11,11 +11,11 @@ metadata:
|
|||||||
app: kube-prometheus-stack-grafana
|
app: kube-prometheus-stack-grafana
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
data:
|
data:
|
||||||
alertmanager-overview.json: |-
|
alertmanager-overview.json: |-
|
||||||
@@ -2,17 +2,17 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager-prometheus-community-kube-alertmanager
|
name: alertmanager-alertmanager-kube-promethe-alertmanager
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
labels:
|
labels:
|
||||||
app: kube-prometheus-stack-alertmanager
|
app: kube-prometheus-stack-alertmanager
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
data:
|
data:
|
||||||
alertmanager.yaml: "Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KaW5oaWJpdF9ydWxlczoKLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIC0gYWxlcnRuYW1lCiAgc291cmNlX21hdGNoZXJzOgogIC0gc2V2ZXJpdHkgPSBjcml0aWNhbAogIHRhcmdldF9tYXRjaGVyczoKICAtIHNldmVyaXR5ID1+IHdhcm5pbmd8aW5mbwotIGVxdWFsOgogIC0gbmFtZXNwYWNlCiAgLSBhbGVydG5hbWUKICBzb3VyY2VfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IHdhcm5pbmcKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIHNvdXJjZV9tYXRjaGVyczoKICAtIGFsZXJ0bmFtZSA9IEluZm9JbmhpYml0b3IKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBhbGVydG5hbWUgPSBJbmZvSW5oaWJpdG9yCnJlY2VpdmVyczoKLSBuYW1lOiAibnVsbCIKcm91dGU6CiAgZ3JvdXBfYnk6CiAgLSBuYW1lc3BhY2UKICBncm91cF9pbnRlcnZhbDogNW0KICBncm91cF93YWl0OiAzMHMKICByZWNlaXZlcjogIm51bGwiCiAgcmVwZWF0X2ludGVydmFsOiAxMmgKICByb3V0ZXM6CiAgLSBtYXRjaGVyczoKICAgIC0gYWxlcnRuYW1lID0gIldhdGNoZG9nIgogICAgcmVjZWl2ZXI6ICJudWxsIgp0ZW1wbGF0ZXM6Ci0gL2V0Yy9hbGVydG1hbmFnZXIvY29uZmlnLyoudG1wbA=="
|
alertmanager.yaml: "Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KaW5oaWJpdF9ydWxlczoKLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIC0gYWxlcnRuYW1lCiAgc291cmNlX21hdGNoZXJzOgogIC0gc2V2ZXJpdHkgPSBjcml0aWNhbAogIHRhcmdldF9tYXRjaGVyczoKICAtIHNldmVyaXR5ID1+IHdhcm5pbmd8aW5mbwotIGVxdWFsOgogIC0gbmFtZXNwYWNlCiAgLSBhbGVydG5hbWUKICBzb3VyY2VfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IHdhcm5pbmcKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIHNvdXJjZV9tYXRjaGVyczoKICAtIGFsZXJ0bmFtZSA9IEluZm9JbmhpYml0b3IKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBhbGVydG5hbWUgPSBJbmZvSW5oaWJpdG9yCnJlY2VpdmVyczoKLSBuYW1lOiAibnVsbCIKcm91dGU6CiAgZ3JvdXBfYnk6CiAgLSBuYW1lc3BhY2UKICBncm91cF9pbnRlcnZhbDogNW0KICBncm91cF93YWl0OiAzMHMKICByZWNlaXZlcjogIm51bGwiCiAgcmVwZWF0X2ludGVydmFsOiAxMmgKICByb3V0ZXM6CiAgLSBtYXRjaGVyczoKICAgIC0gYWxlcnRuYW1lID0gIldhdGNoZG9nIgogICAgcmVjZWl2ZXI6ICJudWxsIgp0ZW1wbGF0ZXM6Ci0gL2V0Yy9hbGVydG1hbmFnZXIvY29uZmlnLyoudG1wbA=="
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus-community-kube-alertmanager
|
name: alertmanager-kube-promethe-alertmanager
|
||||||
namespace: vynil-monitor
|
namespace: vynil-monitor
|
||||||
labels:
|
labels:
|
||||||
app: kube-prometheus-stack-alertmanager
|
app: kube-prometheus-stack-alertmanager
|
||||||
@@ -11,10 +11,10 @@ metadata:
|
|||||||
app.kubernetes.io/component: alertmanager
|
app.kubernetes.io/component: alertmanager
|
||||||
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
app.kubernetes.io/instance: prometheus-community
|
app.kubernetes.io/instance: alertmanager
|
||||||
app.kubernetes.io/version: "56.0.3"
|
app.kubernetes.io/version: "56.0.3"
|
||||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
chart: kube-prometheus-stack-56.0.3
|
chart: kube-prometheus-stack-56.0.3
|
||||||
release: "prometheus-community"
|
release: "alertmanager"
|
||||||
heritage: "Helm"
|
heritage: "Helm"
|
||||||
automountServiceAccountToken: true
|
automountServiceAccountToken: true
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
# Source: kube-prometheus-stack/templates/alertmanager/service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: alertmanager-kube-promethe-alertmanager
|
||||||
|
namespace: vynil-monitor
|
||||||
|
labels:
|
||||||
|
app: kube-prometheus-stack-alertmanager
|
||||||
|
self-monitor: "true"
|
||||||
|
|
||||||
|
app.kubernetes.io/managed-by: Helm
|
||||||
|
app.kubernetes.io/instance: alertmanager
|
||||||
|
app.kubernetes.io/version: "56.0.3"
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||||
|
chart: kube-prometheus-stack-56.0.3
|
||||||
|
release: "alertmanager"
|
||||||
|
heritage: "Helm"
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: http-web
|
||||||
|
port: 9093
|
||||||
|
targetPort: 9093
|
||||||
|
protocol: TCP
|
||||||
|
- name: reloader-web
|
||||||
|
appProtocol: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: reloader-web
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
alertmanager: alertmanager-kube-promethe-alertmanager
|
||||||
|
sessionAffinity: None
|
||||||
|
type: "ClusterIP"
|
||||||
@@ -6,41 +6,21 @@ metadata:
|
|||||||
name: prometheus
|
name: prometheus
|
||||||
description: null
|
description: null
|
||||||
options:
|
options:
|
||||||
app-group:
|
domain-name:
|
||||||
default: monitor
|
default: your_company.com
|
||||||
examples:
|
examples:
|
||||||
- monitor
|
- your_company.com
|
||||||
type: string
|
|
||||||
replicas:
|
|
||||||
default: 1
|
|
||||||
examples:
|
|
||||||
- 1
|
|
||||||
type: integer
|
|
||||||
listenLocal:
|
|
||||||
default: false
|
|
||||||
examples:
|
|
||||||
- false
|
|
||||||
type: boolean
|
|
||||||
enableAdminAPI:
|
|
||||||
default: false
|
|
||||||
examples:
|
|
||||||
- false
|
|
||||||
type: boolean
|
|
||||||
retention:
|
|
||||||
default: 10d
|
|
||||||
examples:
|
|
||||||
- 10d
|
|
||||||
type: string
|
|
||||||
logLevel:
|
|
||||||
default: info
|
|
||||||
examples:
|
|
||||||
- info
|
|
||||||
type: string
|
type: string
|
||||||
domain:
|
domain:
|
||||||
default: your-company
|
default: your-company
|
||||||
examples:
|
examples:
|
||||||
- your-company
|
- your-company
|
||||||
type: string
|
type: string
|
||||||
|
ingress-class:
|
||||||
|
default: traefik
|
||||||
|
examples:
|
||||||
|
- traefik
|
||||||
|
type: string
|
||||||
images:
|
images:
|
||||||
default:
|
default:
|
||||||
prometheus:
|
prometheus:
|
||||||
@@ -80,30 +60,50 @@ options:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
|
app-group:
|
||||||
|
default: monitor
|
||||||
|
examples:
|
||||||
|
- monitor
|
||||||
|
type: string
|
||||||
|
replicas:
|
||||||
|
default: 1
|
||||||
|
examples:
|
||||||
|
- 1
|
||||||
|
type: integer
|
||||||
shards:
|
shards:
|
||||||
default: 1
|
default: 1
|
||||||
examples:
|
examples:
|
||||||
- 1
|
- 1
|
||||||
type: integer
|
type: integer
|
||||||
domain-name:
|
logLevel:
|
||||||
default: your_company.com
|
default: info
|
||||||
examples:
|
examples:
|
||||||
- your_company.com
|
- info
|
||||||
type: string
|
type: string
|
||||||
issuer:
|
listenLocal:
|
||||||
default: letsencrypt-prod
|
default: false
|
||||||
examples:
|
examples:
|
||||||
- letsencrypt-prod
|
- false
|
||||||
|
type: boolean
|
||||||
|
enableAdminAPI:
|
||||||
|
default: false
|
||||||
|
examples:
|
||||||
|
- false
|
||||||
|
type: boolean
|
||||||
|
retention:
|
||||||
|
default: 10d
|
||||||
|
examples:
|
||||||
|
- 10d
|
||||||
type: string
|
type: string
|
||||||
sub-domain:
|
sub-domain:
|
||||||
default: prometheus
|
default: prometheus
|
||||||
examples:
|
examples:
|
||||||
- prometheus
|
- prometheus
|
||||||
type: string
|
type: string
|
||||||
ingress-class:
|
issuer:
|
||||||
default: traefik
|
default: letsencrypt-prod
|
||||||
examples:
|
examples:
|
||||||
- traefik
|
- letsencrypt-prod
|
||||||
type: string
|
type: string
|
||||||
dependencies:
|
dependencies:
|
||||||
- dist: null
|
- dist: null
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ resource "kubectl_manifest" "prometheus" {
|
|||||||
spec:
|
spec:
|
||||||
image: "${var.images.prometheus.registry}/${var.images.prometheus.repository}:${var.images.prometheus.tag}"
|
image: "${var.images.prometheus.registry}/${var.images.prometheus.repository}:${var.images.prometheus.tag}"
|
||||||
version: ${var.images.prometheus.tag}
|
version: ${var.images.prometheus.tag}
|
||||||
externalUrl: http://prometheus-operated.${var.namespace}:9090
|
externalUrl: http://${var.component}-${var.instance}.${var.namespace}:9090
|
||||||
replicas: ${var.replicas}
|
replicas: ${var.replicas}
|
||||||
shards: ${var.shards}
|
shards: ${var.shards}
|
||||||
logLevel: ${var.logLevel}
|
logLevel: ${var.logLevel}
|
||||||
@@ -23,7 +23,7 @@ resource "kubectl_manifest" "prometheus" {
|
|||||||
alerting:
|
alerting:
|
||||||
alertmanagers:
|
alertmanagers:
|
||||||
- namespace: ${var.namespace}
|
- namespace: ${var.namespace}
|
||||||
name: prometheus-community-kube-alertmanager
|
name: alertmanager-alertmanager
|
||||||
port: http-web
|
port: http-web
|
||||||
pathPrefix: "/"
|
pathPrefix: "/"
|
||||||
apiVersion: v2
|
apiVersion: v2
|
||||||
|
|||||||
Reference in New Issue
Block a user