diff --git a/meta/domain-monitor/apps.tf b/meta/domain-monitor/apps.tf new file mode 100644 index 0000000..7b7d58a --- /dev/null +++ b/meta/domain-monitor/apps.tf @@ -0,0 +1,119 @@ +locals { + annotations = { + "vynil.solidite.fr/meta" = "domain-monitor" + "vynil.solidite.fr/name" = var.namespace + "vynil.solidite.fr/domain" = var.domain-name + "vynil.solidite.fr/issuer" = var.issuer + "vynil.solidite.fr/ingress" = var.ingress-class + } + global = { + "domain" = var.namespace + "domain-name" = "admin.${var.domain-name}" + "issuer" = var.issuer + "ingress-class" = var.ingress-class + "backups" = var.backups + "app-group" = var.app-group + } + prometheus = { for k, v in var.prometheus : k => v if k!="enable" } + alertmanager = { for k, v in var.alertmanager : k => v if k!="enable" } + nodeExporter = { for k, v in var.node-exporter : k => v if k!="enable" } + kubeStateMetrics = merge({"cluster-admin" = true}, { for k, v in var.kube-state-metrics : k => v if k!="enable" }) + monitorControlPlan = merge({"cluster-admin" = true}, { for k, v in var.monitor-control-plan : k => v if k!="enable" }) +} + +resource "kubernetes_namespace_v1" "monitor-ns" { + count = ( var.prometheus.enable || var.alertmanager.enable || var.nodeExporter.enable || var.kubeStateMetrics.enable || var.monitorControlPlan.enable )? 1 : 0 + metadata { + annotations = local.annotations + labels = merge(local.common-labels, local.annotations) + name = "${var.namespace}-monitor" + } +} + +resource "kubectl_manifest" "alertmanager" { + count = var.alertmanager.enable ? 1 : 0 + depends_on = [kubernetes_namespace_v1.monitor-ns] + yaml_body = <<-EOF + nodeExporterVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "alertmanager" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "share" + component: "alertmanager" + options: ${jsonencode(merge(local.global, local.alertmanager))} + EOF +} + +resource "kubectl_manifest" "prometheus" { + count = var.prometheus.enable ? 1 : 0 + yaml_body = <<-EOF + nodeExporterVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "prometheus" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "prometheus" + options: ${jsonencode(merge(local.global, local.prometheus))} + EOF +} + +resource "kubectl_manifest" "nodeExporter" { + count = var.nodeExporter.enable ? 1 : 0 + yaml_body = <<-EOF + nodeExporterVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "node-exporter" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "k8s-nodeExporter" + options: ${jsonencode(merge(local.global, local.nodeExporter))} + EOF +} + +resource "kubectl_manifest" "kubeStateMetrics" { + count = var.kubeStateMetrics.enable ? 1 : 0 + depends_on = [kubernetes_namespace_v1.monitor-ns] + yaml_body = <<-EOF + nodeExporterVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "kube-state-metrics" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "kubeStateMetrics" + options: ${jsonencode(merge(local.global, local.kubeStateMetrics))} + EOF +} + +resource "kubectl_manifest" "monitorControlPlan" { + count = var.monitorControlPlan.enable ? 1 : 0 + depends_on = [kubernetes_namespace_v1.monitor-ns] + yaml_body = <<-EOF + nodeExporterVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "monitor-control-plan" + namespace: "${kubernetes_namespace_v1.monitor-ns[0].metadata[0].name}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "monitor" + component: "monitor-control-plan" + options: ${jsonencode(merge(local.global, local.monitorControlPlan))} + EOF +} diff --git a/meta/domain-monitor/index.yaml b/meta/domain-monitor/index.yaml new file mode 100644 index 0000000..cb50c02 --- /dev/null +++ b/meta/domain-monitor/index.yaml @@ -0,0 +1,173 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: meta +metadata: + name: domain-monitor + description: null +options: + issuer: + default: letsencrypt-prod + examples: + - letsencrypt-prod + type: string + backups: + default: + enable: false + endpoint: '' + key-id-key: s3-id + secret-key: s3-secret + secret-name: backup-settings + examples: + - enable: false + endpoint: '' + key-id-key: s3-id + secret-key: s3-secret + secret-name: backup-settings + properties: + enable: + default: false + type: boolean + endpoint: + default: '' + type: string + key-id-key: + default: s3-id + type: string + secret-key: + default: s3-secret + type: string + secret-name: + default: backup-settings + type: string + type: object + ingress-class: + default: traefik + examples: + - traefik + type: string + distributions: + default: + core: core + domain: domain + examples: + - core: core + domain: domain + properties: + core: + default: core + type: string + domain: + default: domain + type: string + type: object + domain-name: + default: your_company.com + examples: + - your_company.com + type: string + node-exporter: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: node-exporter + kube-state-metrics: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: kube-state-metrics + storage-classes: + default: + BlockReadWriteMany: '' + BlockReadWriteOnce: '' + FilesystemReadWriteMany: '' + FilesystemReadWriteOnce: '' + examples: + - BlockReadWriteMany: '' + BlockReadWriteOnce: '' + FilesystemReadWriteMany: '' + FilesystemReadWriteOnce: '' + properties: + BlockReadWriteMany: + default: '' + type: string + BlockReadWriteOnce: + default: '' + type: string + FilesystemReadWriteMany: + default: '' + type: string + FilesystemReadWriteOnce: + default: '' + type: string + type: object + app-group: + default: infra + examples: + - infra + type: string + prometheus: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: prometheus + alertmanager: + default: + enable: true + examples: + - enable: true + properties: + enable: + default: true + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: alertmanager + domain: + default: your-company + examples: + - your-company + type: string + monitor-control-plan: + default: + enable: false + examples: + - enable: false + properties: + enable: + default: false + type: boolean + type: object + x-vynil-category: monitor + x-vynil-package: monitor-control-plan +dependencies: [] +providers: + kubernetes: true + authentik: null + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/meta/domain/index.yaml b/meta/domain/index.yaml index 4d1dbcd..880f0d3 100644 --- a/meta/domain/index.yaml +++ b/meta/domain/index.yaml @@ -6,95 +6,6 @@ metadata: name: domain description: null options: - erp: - default: - dolibarr: - enable: true - enable: false - examples: - - dolibarr: - enable: true - enable: false - properties: - dolibarr: - default: - enable: true - properties: - enable: - default: true - type: boolean - type: object - enable: - default: false - type: boolean - type: object - x-vynil-category: meta - x-vynil-package: domain-erp - ingress-class: - default: traefik - examples: - - traefik - type: string - devspaces: - default: - enable: false - examples: - - enable: false - properties: - enable: - default: false - type: boolean - type: object - x-vynil-category: meta - x-vynil-package: domain-devspaces - auth: - default: - authentik: - enable: true - enable: true - examples: - - authentik: - enable: true - enable: true - properties: - authentik: - default: - enable: true - properties: - enable: - default: true - type: boolean - type: object - enable: - default: true - type: boolean - type: object - x-vynil-category: meta - x-vynil-package: domain-auth - infra: - default: - enable: false - traefik: - enable: false - examples: - - enable: false - traefik: - enable: false - properties: - enable: - default: false - type: boolean - traefik: - default: - enable: false - properties: - enable: - default: false - type: boolean - type: object - type: object - x-vynil-category: meta - x-vynil-package: domain-infra storage-classes: default: BlockReadWriteMany: '' @@ -120,16 +31,115 @@ options: default: '' type: string type: object - domain-name: - default: your_company.com + ci: + default: + enable: false + gitea: + enable: true examples: - - your_company.com - type: string + - enable: false + gitea: + enable: true + properties: + enable: + default: false + type: boolean + gitea: + default: + enable: true + properties: + enable: + default: true + type: boolean + type: object + type: object + x-vynil-category: meta + x-vynil-package: domain-ci + monitor: + default: + enable: false + examples: + - enable: false + properties: + enable: + default: false + type: boolean + type: object + x-vynil-category: meta + x-vynil-package: domain-monitor + distributions: + default: + core: core + domain: domain + examples: + - core: core + domain: domain + properties: + core: + default: core + type: string + domain: + default: domain + type: string + type: object + infra: + default: + enable: false + traefik: + enable: false + examples: + - enable: false + traefik: + enable: false + properties: + enable: + default: false + type: boolean + traefik: + default: + enable: false + properties: + enable: + default: false + type: boolean + type: object + type: object + x-vynil-category: meta + x-vynil-package: domain-infra issuer: default: letsencrypt-prod examples: - letsencrypt-prod type: string + ingress-class: + default: traefik + examples: + - traefik + type: string + erp: + default: + dolibarr: + enable: true + enable: false + examples: + - dolibarr: + enable: true + enable: false + properties: + dolibarr: + default: + enable: true + properties: + enable: + default: true + type: boolean + type: object + enable: + default: false + type: boolean + type: object + x-vynil-category: meta + x-vynil-package: domain-erp apps: default: enable: false @@ -176,6 +186,18 @@ options: type: boolean type: object type: object + devspaces: + default: + enable: false + examples: + - enable: false + properties: + enable: + default: false + type: boolean + type: object + x-vynil-category: meta + x-vynil-package: domain-devspaces backups: default: enable: false @@ -206,35 +228,22 @@ options: default: backup-settings type: string type: object - distributions: - default: - core: core - domain: domain + domain-name: + default: your_company.com examples: - - core: core - domain: domain - properties: - core: - default: core - type: string - domain: - default: domain - type: string - type: object - ci: + - your_company.com + type: string + auth: default: - enable: false - gitea: + authentik: enable: true + enable: true examples: - - enable: false - gitea: + - authentik: enable: true + enable: true properties: - enable: - default: false - type: boolean - gitea: + authentik: default: enable: true properties: @@ -242,9 +251,12 @@ options: default: true type: boolean type: object + enable: + default: true + type: boolean type: object x-vynil-category: meta - x-vynil-package: domain-ci + x-vynil-package: domain-auth dependencies: [] providers: kubernetes: null diff --git a/meta/domain/installs.tf b/meta/domain/installs.tf index cd94d74..7861d15 100644 --- a/meta/domain/installs.tf +++ b/meta/domain/installs.tf @@ -21,6 +21,7 @@ locals { erp = { for k, v in var.erp : k => v if k!="enable" } apps = { for k, v in var.apps : k => v if k!="enable" } mail = { for k, v in var.mail : k => v if k!="enable" } + monitor = { for k, v in var.monitor : k => v if k!="enable" } devspaces = { for k, v in var.devspaces : k => v if k!="enable" } # Force install authentik and it's modules when any are needed @@ -218,6 +219,22 @@ resource "kubectl_manifest" "mail" { options: ${jsonencode(merge(local.global, local.mail))} EOF } +resource "kubectl_manifest" "monitor" { + count = var.monitor.enable ? 1 : 0 + yaml_body = <<-EOF + apiVersion: "vynil.solidite.fr/v1" + kind: "Install" + metadata: + name: "monitor" + namespace: "${var.namespace}" + labels: ${jsonencode(local.common-labels)} + spec: + distrib: "${var.distributions.domain}" + category: "meta" + component: "domain-monitor" + options: ${jsonencode(merge(local.global, local.monitor))} + EOF +} resource "kubectl_manifest" "devspaces" { count = var.devspaces.enable ? 1 : 0 yaml_body = <<-EOF diff --git a/monitor/alertmanager/datas.tf b/monitor/alertmanager/datas.tf new file mode 100644 index 0000000..e9d2ef7 --- /dev/null +++ b/monitor/alertmanager/datas.tf @@ -0,0 +1,165 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } + rb-patch = <<-EOF + - op: replace + path: /subjects/0/namespace + value: "${var.namespace}" + EOF +} + +data "kubernetes_secret_v1" "authentik" { + metadata { + name = "authentik" + namespace = "${var.domain}-auth" + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] + patches { + target { + kind = "Alertmanager" + name = "prometheus-community-kube-alertmanager" + } + patch = <<-EOF + apiVersion: monitoring.coreos.com/v1 + kind: Alertmanager + metadata: + name: prometheus-community-kube-alertmanager + spec: + image: "${var.images.alertmanager.registry}/${var.images.alertmanager.repository}:${var.images.alertmanager.tag}" + version: ${var.images.alertmanager.tag} + externalUrl: http://prometheus-community-kube-alertmanager.${var.namespace}:9093 + replicas: ${var.replicas} + listenLocal: ${var.listenLocal} + logLevel: "${var.logLevel}" + retention: "${var.retention}" + EOF + } + patches { + target { + kind = "ConfigMap" + name = "alertmanager-kube-grafana-datasource" + } + patch = <<-EOF + apiVersion: v1 + kind: ConfigMap + metadata: + name: alertmanager-kube-grafana-datasource + data: + datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Alertmanager + type: alertmanager + uid: alertmanager + url: http://prometheus-community-kube-alertmanager.${var.namespace}:9093/ + access: proxy + jsonData: + handleGrafanaManagedAlerts: false + implementation: prometheus + EOF + } + patches { + target { + kind = "ServiceMonitor" + name = "prometheus-community-kube-alertmanager" + } + patch = <<-EOF + - op: replace + path: /spec/namespaceSelector/matchNames/0 + value: "${var.namespace}" + EOF + } + patches { + target { + kind = "PrometheusRule" + name = "prometheus-community-kube-alertmanager.rules" + } + patch = <<-EOF +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-alertmanager.rules +spec: + groups: + - name: alertmanager.rules + rules: + - alert: AlertmanagerFailedReload + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_config_last_reload_successful{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) == 0 + - alert: AlertmanagerMembersInconsistent + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) + < on (namespace,service,cluster) group_left + count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m])) + - alert: AlertmanagerFailedToSendAlerts + expr: |- + ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) + ) + > 0.01 + - alert: AlertmanagerClusterFailedToSendAlerts + expr: |- + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m]) + ) + > 0.01 + - alert: AlertmanagerClusterFailedToSendAlerts + expr: |- + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m]) + ) + > 0.01 + - alert: AlertmanagerConfigInconsistent + expr: |- + count by (namespace,service,cluster) ( + count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}) + ) + != 1 + - alert: AlertmanagerClusterDown + expr: |- + ( + count by (namespace,service,cluster) ( + avg_over_time(up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[5m]) < 0.5 + ) + / + count by (namespace,service,cluster) ( + up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"} + ) + ) + >= 0.5 + - alert: AlertmanagerClusterCrashlooping + expr: |- + ( + count by (namespace,service,cluster) ( + changes(process_start_time_seconds{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"}[10m]) > 4 + ) + / + count by (namespace,service,cluster) ( + up{job="prometheus-community-kube-alertmanager",namespace="${var.namespace}"} + ) + ) + >= 0.5 + EOF + } +} diff --git a/monitor/alertmanager/index.yaml b/monitor/alertmanager/index.yaml new file mode 100644 index 0000000..dee1af6 --- /dev/null +++ b/monitor/alertmanager/index.yaml @@ -0,0 +1,102 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: alertmanager + description: null +options: + retention: + default: 120h + examples: + - 120h + type: string + images: + default: + alertmanager: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/alertmanager + tag: v0.26.0 + examples: + - alertmanager: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/alertmanager + tag: v0.26.0 + properties: + alertmanager: + default: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/alertmanager + tag: v0.26.0 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: quay.io + type: string + repository: + default: prometheus/alertmanager + type: string + tag: + default: v0.26.0 + type: string + type: object + type: object + domain: + default: your-company + examples: + - your-company + type: string + listenLocal: + default: false + examples: + - false + type: boolean + logLevel: + default: info + examples: + - info + type: string + sub-domain: + default: to-be-set + examples: + - to-be-set + type: string + issuer: + default: letsencrypt-prod + examples: + - letsencrypt-prod + type: string + ingress-class: + default: traefik + examples: + - traefik + type: string + domain-name: + default: your_company.com + examples: + - your_company.com + type: string + replicas: + default: 1 + examples: + - 1 + type: integer +dependencies: [] +providers: + kubernetes: true + authentik: true + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_prometheus-community-kube-alertmanager.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_prometheus-community-kube-alertmanager.yaml new file mode 100644 index 0000000..4d6a6f3 --- /dev/null +++ b/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_prometheus-community-kube-alertmanager.yaml @@ -0,0 +1,38 @@ +# Source: kube-prometheus-stack/templates/alertmanager/alertmanager.yaml +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + name: prometheus-community-kube-alertmanager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-alertmanager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + image: "quay.io/prometheus/alertmanager:v0.26.0" + version: v0.26.0 + replicas: 1 + listenLocal: false + serviceAccountName: prometheus-community-kube-alertmanager + externalUrl: http://prometheus-community-kube-alertmanager.vynil-monitor:9093 + paused: false + logFormat: "logfmt" + logLevel: "info" + retention: "120h" + alertmanagerConfigSelector: {} + alertmanagerConfigNamespaceSelector: {} + routePrefix: "/" + securityContext: + fsGroup: 2000 + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + portName: http-web \ No newline at end of file diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-alertmanager.rules.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-alertmanager.rules.yaml new file mode 100644 index 0000000..25b7622 --- /dev/null +++ b/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-alertmanager.rules.yaml @@ -0,0 +1,142 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-alertmanager.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: alertmanager.rules + rules: + - alert: AlertmanagerFailedReload + annotations: + description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload + summary: Reloading an Alertmanager configuration has failed. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_config_last_reload_successful{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) == 0 + for: 10m + labels: + severity: critical + - alert: AlertmanagerMembersInconsistent + annotations: + description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent + summary: A member of an Alertmanager cluster has not found all other cluster members. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) + < on (namespace,service,cluster) group_left + count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m])) + for: 15m + labels: + severity: critical + - alert: AlertmanagerFailedToSendAlerts + annotations: + description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts + summary: An Alertmanager instance failed to send notifications. + expr: |- + ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: warning + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: + description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. + expr: |- + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: critical + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: + description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. + expr: |- + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: warning + - alert: AlertmanagerConfigInconsistent + annotations: + description: Alertmanager instances within the {{$labels.job}} cluster have different configurations. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent + summary: Alertmanager instances within the same cluster have different configurations. + expr: |- + count by (namespace,service,cluster) ( + count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}) + ) + != 1 + for: 20m + labels: + severity: critical + - alert: AlertmanagerClusterDown + annotations: + description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown + summary: Half or more of the Alertmanager instances within the same cluster are down. + expr: |- + ( + count by (namespace,service,cluster) ( + avg_over_time(up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[5m]) < 0.5 + ) + / + count by (namespace,service,cluster) ( + up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"} + ) + ) + >= 0.5 + for: 5m + labels: + severity: critical + - alert: AlertmanagerClusterCrashlooping + annotations: + description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping + summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. + expr: |- + ( + count by (namespace,service,cluster) ( + changes(process_start_time_seconds{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"}[10m]) > 4 + ) + / + count by (namespace,service,cluster) ( + up{job="prometheus-community-kube-alertmanager",namespace="vynil-monitor"} + ) + ) + >= 0.5 + for: 5m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-alertmanager.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-alertmanager.yaml new file mode 100644 index 0000000..5934e2f --- /dev/null +++ b/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-alertmanager.yaml @@ -0,0 +1,33 @@ +# Source: kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-alertmanager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-alertmanager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + + selector: + matchLabels: + app: kube-prometheus-stack-alertmanager + release: "prometheus-community" + self-monitor: "true" + namespaceSelector: + matchNames: + - "vynil-monitor" + endpoints: + - port: http-web + enableHttp2: true + path: "/metrics" + - port: reloader-web + scheme: http + path: "/metrics" \ No newline at end of file diff --git a/monitor/alertmanager/presentation.tf b/monitor/alertmanager/presentation.tf new file mode 100644 index 0000000..8063a28 --- /dev/null +++ b/monitor/alertmanager/presentation.tf @@ -0,0 +1,75 @@ +locals { + dns-name = "${var.sub-domain}.${var.domain-name}" + dns-names = [local.dns-name] + app-name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance) + icon = "icon.svg" + request_headers = { + "Content-Type" = "application/json" + Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}" + } + service = { + "name" = "prometheus-community-kube-prometheus" + "port" = { + "number" = 9090 + } + } +} + +module "ingress" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//ingress" + component = "" + instance = var.instance + namespace = var.namespace + issuer = var.issuer + ingress_class = var.ingress-class + labels = local.common-labels + dns_names = local.dns-names + middlewares = ["forward-${local.app-name}"] + services = [local.service] + providers = { + kubectl = kubectl + } +} + +module "application" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//application" + component = var.component + instance = var.instance + app_group = var.app-group + dns_name = local.dns-name + icon = local.icon + protocol_provider = module.forward.provider-id + providers = { + authentik = authentik + } +} + +provider "restapi" { + uri = "http://authentik.${var.domain}-auth.svc/api/v3/" + headers = local.request_headers + create_method = "PATCH" + update_method = "PATCH" + destroy_method = "PATCH" + write_returns_object = true + id_attribute = "name" +} + +module "forward" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//forward" + component = var.component + instance = var.instance + domain = var.domain + namespace = var.namespace + ingress_class = var.ingress-class + labels = local.common-labels + dns_names = local.dns-names + service = local.service + icon = local.icon + request_headers = local.request_headers + providers = { + restapi = restapi + http = http + kubectl = kubectl + authentik = authentik + } +} diff --git a/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml b/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml new file mode 100644 index 0000000..d746a94 --- /dev/null +++ b/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-kube-grafana-datasource + labels: + grafana_datasource: "1" + app: alertmanager +data: + datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Alertmanager + type: alertmanager + uid: alertmanager + url: http://prometheus-community-kube-alertmanager.vynil-monitor:9093/ + access: proxy + jsonData: + handleGrafanaManagedAlerts: false + implementation: prometheus \ No newline at end of file diff --git a/monitor/alertmanager/v1_Secret_alertmanager-prometheus-community-kube-alertmanager.yaml b/monitor/alertmanager/v1_Secret_alertmanager-prometheus-community-kube-alertmanager.yaml new file mode 100644 index 0000000..83a8be8 --- /dev/null +++ b/monitor/alertmanager/v1_Secret_alertmanager-prometheus-community-kube-alertmanager.yaml @@ -0,0 +1,18 @@ +# Source: kube-prometheus-stack/templates/alertmanager/secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: alertmanager-prometheus-community-kube-alertmanager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-alertmanager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + alertmanager.yaml: "Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KaW5oaWJpdF9ydWxlczoKLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIC0gYWxlcnRuYW1lCiAgc291cmNlX21hdGNoZXJzOgogIC0gc2V2ZXJpdHkgPSBjcml0aWNhbAogIHRhcmdldF9tYXRjaGVyczoKICAtIHNldmVyaXR5ID1+IHdhcm5pbmd8aW5mbwotIGVxdWFsOgogIC0gbmFtZXNwYWNlCiAgLSBhbGVydG5hbWUKICBzb3VyY2VfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IHdhcm5pbmcKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIHNvdXJjZV9tYXRjaGVyczoKICAtIGFsZXJ0bmFtZSA9IEluZm9JbmhpYml0b3IKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBhbGVydG5hbWUgPSBJbmZvSW5oaWJpdG9yCnJlY2VpdmVyczoKLSBuYW1lOiAibnVsbCIKcm91dGU6CiAgZ3JvdXBfYnk6CiAgLSBuYW1lc3BhY2UKICBncm91cF9pbnRlcnZhbDogNW0KICBncm91cF93YWl0OiAzMHMKICByZWNlaXZlcjogIm51bGwiCiAgcmVwZWF0X2ludGVydmFsOiAxMmgKICByb3V0ZXM6CiAgLSBtYXRjaGVyczoKICAgIC0gYWxlcnRuYW1lID0gIldhdGNoZG9nIgogICAgcmVjZWl2ZXI6ICJudWxsIgp0ZW1wbGF0ZXM6Ci0gL2V0Yy9hbGVydG1hbmFnZXIvY29uZmlnLyoudG1wbA==" \ No newline at end of file diff --git a/monitor/alertmanager/v1_ServiceAccount_prometheus-community-kube-alertmanager.yaml b/monitor/alertmanager/v1_ServiceAccount_prometheus-community-kube-alertmanager.yaml new file mode 100644 index 0000000..0067dcc --- /dev/null +++ b/monitor/alertmanager/v1_ServiceAccount_prometheus-community-kube-alertmanager.yaml @@ -0,0 +1,20 @@ +--- +# Source: kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-community-kube-alertmanager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-alertmanager + app.kubernetes.io/name: kube-prometheus-stack-alertmanager + app.kubernetes.io/component: alertmanager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +automountServiceAccountToken: true \ No newline at end of file diff --git a/monitor/alertmanager/v1_Service_prometheus-community-kube-alertmanager.yaml b/monitor/alertmanager/v1_Service_prometheus-community-kube-alertmanager.yaml new file mode 100644 index 0000000..949280b --- /dev/null +++ b/monitor/alertmanager/v1_Service_prometheus-community-kube-alertmanager.yaml @@ -0,0 +1,32 @@ +# Source: kube-prometheus-stack/templates/alertmanager/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-alertmanager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-alertmanager + self-monitor: "true" + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + ports: + - name: http-web + port: 9093 + targetPort: 9093 + protocol: TCP + - name: reloader-web + appProtocol: http + port: 8080 + targetPort: reloader-web + selector: + app.kubernetes.io/name: alertmanager + alertmanager: prometheus-community-kube-alertmanager + sessionAffinity: None + type: "ClusterIP" \ No newline at end of file diff --git a/monitor/kube-state-metrics/apps_v1_Deployment_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/apps_v1_Deployment_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..885fe95 --- /dev/null +++ b/monitor/kube-state-metrics/apps_v1_Deployment_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,82 @@ +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-community-kube-state-metrics + namespace: vynil-monitor + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + replicas: 1 + strategy: + type: RollingUpdate + revisionHistoryLimit: 10 + template: + metadata: + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community + spec: + hostNetwork: false + serviceAccountName: prometheus-community-kube-state-metrics + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + containers: + - name: kube-state-metrics + args: + - --port=8080 + - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments + imagePullPolicy: IfNotPresent + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1 + ports: + - containerPort: 8080 + name: "http" + livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL \ No newline at end of file diff --git a/monitor/kube-state-metrics/datas.tf b/monitor/kube-state-metrics/datas.tf new file mode 100644 index 0000000..e8616b6 --- /dev/null +++ b/monitor/kube-state-metrics/datas.tf @@ -0,0 +1,38 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } + rb-patch = <<-EOF + - op: replace + path: /subjects/0/namespace + value: "${var.namespace}" + EOF +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] + images { + name = "registry.k8s.io/kube-state-metrics/kube-state-metrics" + new_name = "${var.images.kube-state-metrics.registry}/${var.images.kube-state-metrics.repository}" + new_tag = "${var.images.kube-state-metrics.tag}" + } +} + +data "kustomization_overlay" "data_no_ns" { + common_labels = local.common-labels + resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("ClusterRole",file))>0] + patches { + target { + kind = "ClusterRoleBinding" + name = "prometheus-community-kube-state-metrics" + } + patch = local.rb-patch + } +} diff --git a/monitor/kube-state-metrics/index.yaml b/monitor/kube-state-metrics/index.yaml new file mode 100644 index 0000000..11a8a72 --- /dev/null +++ b/monitor/kube-state-metrics/index.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: kube-state-metrics + description: null +options: + images: + default: + kube-state-metrics: + pullPolicy: IfNotPresent + registry: registry.k8s.io + repository: kube-state-metrics/kube-state-metrics + tag: v2.10.1 + examples: + - kube-state-metrics: + pullPolicy: IfNotPresent + registry: registry.k8s.io + repository: kube-state-metrics/kube-state-metrics + tag: v2.10.1 + properties: + kube-state-metrics: + default: + pullPolicy: IfNotPresent + registry: registry.k8s.io + repository: kube-state-metrics/kube-state-metrics + tag: v2.10.1 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: registry.k8s.io + type: string + repository: + default: kube-state-metrics/kube-state-metrics + type: string + tag: + default: v2.10.1 + type: string + type: object + type: object +dependencies: [] +providers: + kubernetes: true + authentik: null + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml b/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml new file mode 100644 index 0000000..11b23d4 --- /dev/null +++ b/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml @@ -0,0 +1,68 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-state-metrics + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-state-metrics + rules: + - alert: KubeStateMetricsListErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: |- + (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsWatchErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardingMismatch + annotations: + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) != 0 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardsMissing + annotations: + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: |- + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1 + - + sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster) + != 0 + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..8b0d36c --- /dev/null +++ b/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,24 @@ +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-state-metrics + namespace: vynil-monitor + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community +spec: + jobLabel: app.kubernetes.io/name + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + endpoints: + - port: http + honorLabels: true \ No newline at end of file diff --git a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..5c64332 --- /dev/null +++ b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community + name: prometheus-community-kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-community-kube-state-metrics +subjects: +- kind: ServiceAccount + name: prometheus-community-kube-state-metrics + namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..a68c2a7 --- /dev/null +++ b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,155 @@ +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community + name: prometheus-community-kube-state-metrics +rules: + +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] + +- apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] + +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] + +- apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] + +- apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] + +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] + +- apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] + +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] + +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] + +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] + +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] + +- apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] \ No newline at end of file diff --git a/monitor/kube-state-metrics/ressources_no_ns.tf b/monitor/kube-state-metrics/ressources_no_ns.tf new file mode 100644 index 0000000..9fa58b7 --- /dev/null +++ b/monitor/kube-state-metrics/ressources_no_ns.tf @@ -0,0 +1,45 @@ + +# first loop through resources in ids_prio[0] +resource "kustomization_resource" "pre_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) +} + +# then loop through resources in ids_prio[1] +# and set an explicit depends_on on kustomization_resource.pre +# wait 2 minutes for any deployment or daemonset to become ready +resource "kustomization_resource" "main_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + wait = true + timeouts { + create = "5m" + update = "5m" + } + + depends_on = [kustomization_resource.pre_no_ns] +} + +# finally, loop through resources in ids_prio[2] +# and set an explicit depends_on on kustomization_resource.main +resource "kustomization_resource" "post_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + + depends_on = [kustomization_resource.main_no_ns] +} diff --git a/monitor/kube-state-metrics/v1_ServiceAccount_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/v1_ServiceAccount_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..5178819 --- /dev/null +++ b/monitor/kube-state-metrics/v1_ServiceAccount_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,16 @@ +--- +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community + name: prometheus-community-kube-state-metrics + namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/kube-state-metrics/v1_Service_prometheus-community-kube-state-metrics.yaml b/monitor/kube-state-metrics/v1_Service_prometheus-community-kube-state-metrics.yaml new file mode 100644 index 0000000..7ae0e79 --- /dev/null +++ b/monitor/kube-state-metrics/v1_Service_prometheus-community-kube-state-metrics.yaml @@ -0,0 +1,28 @@ +# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-state-metrics + namespace: vynil-monitor + labels: + helm.sh/chart: kube-state-metrics-5.16.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "2.10.1" + release: prometheus-community + annotations: + prometheus.io/scrape: 'true' +spec: + type: "ClusterIP" + ports: + - name: "http" + protocol: TCP + port: 8080 + targetPort: 8080 + + selector: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: prometheus-community \ No newline at end of file diff --git a/monitor/monitor-control-plan/datas.tf b/monitor/monitor-control-plan/datas.tf new file mode 100644 index 0000000..1edaaa6 --- /dev/null +++ b/monitor/monitor-control-plan/datas.tf @@ -0,0 +1,21 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("v1_Service_prometheus",file))<1] +} + +data "kustomization_overlay" "data_no_ns" { + common_labels = local.common-labels + resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("v1_Service_prometheus",file))>0] +} diff --git a/monitor/monitor-control-plan/index.yaml b/monitor/monitor-control-plan/index.yaml new file mode 100644 index 0000000..572f03c --- /dev/null +++ b/monitor/monitor-control-plan/index.yaml @@ -0,0 +1,82 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: monitor-control-plan + description: null +options: + sub-domain: + default: to-be-set + examples: + - to-be-set + type: string + issuer: + default: letsencrypt-prod + examples: + - letsencrypt-prod + type: string + domain: + default: your-company + examples: + - your-company + type: string + ingress-class: + default: traefik + examples: + - traefik + type: string + images: + default: + operator: + pullPolicy: IfNotPresent + registry: docker.io + repository: to-be/defined + tag: v1.0.0 + examples: + - operator: + pullPolicy: IfNotPresent + registry: docker.io + repository: to-be/defined + tag: v1.0.0 + properties: + operator: + default: + pullPolicy: IfNotPresent + registry: docker.io + repository: to-be/defined + tag: v1.0.0 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: docker.io + type: string + repository: + default: to-be/defined + type: string + tag: + default: v1.0.0 + type: string + type: object + type: object + domain-name: + default: your_company.com + examples: + - your_company.com + type: string +dependencies: [] +providers: + kubernetes: true + authentik: true + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml new file mode 100644 index 0000000..7edc70d --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml @@ -0,0 +1,167 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-etcd + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: etcd + rules: + - alert: etcdMembersDown + annotations: + description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).' + summary: etcd cluster members are down. + expr: |- + max without (endpoint) ( + sum without (instance) (up{job=~".*etcd.*"} == bool 0) + or + count without (To) ( + sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 + ) + ) + > 0 + for: 10m + labels: + severity: critical + - alert: etcdInsufficientMembers + annotations: + description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).' + summary: etcd cluster has insufficient number of members. + expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) + for: 3m + labels: + severity: critical + - alert: etcdNoLeader + annotations: + description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.' + summary: etcd cluster has no leader. + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: 1m + labels: + severity: critical + - alert: etcdHighNumberOfLeaderChanges + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' + summary: etcd cluster has high number of leader changes. + expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 + for: 5m + labels: + severity: warning + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 1 + for: 10m + labels: + severity: warning + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 5 + for: 5m + labels: + severity: critical + - alert: etcdGRPCRequestsSlow + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method.' + summary: etcd grpc requests are slow + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) + > 0.15 + for: 10m + labels: + severity: critical + - alert: etcdMemberCommunicationSlow + annotations: + description: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster member communication is slow. + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: 10m + labels: + severity: warning + - alert: etcdHighNumberOfFailedProposals + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.' + summary: etcd cluster has high number of proposal failures. + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: 15m + labels: + severity: warning + - alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: 10m + labels: + severity: warning + - alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 1 + for: 10m + labels: + severity: critical + - alert: etcdHighCommitDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile commit durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: 10m + labels: + severity: warning + - alert: etcdDatabaseQuotaLowSpace + annotations: + description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' + summary: etcd cluster database is running full. + expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 + for: 10m + labels: + severity: critical + - alert: etcdExcessiveDatabaseGrowth + annotations: + description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.' + summary: etcd cluster database growing very fast. + expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} + for: 10m + labels: + severity: warning + - alert: etcdDatabaseHighFragmentationRatio + annotations: + description: 'etcd cluster "{{ $labels.job }}": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' + runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation + summary: etcd database size in use is less than 50% of the actual allocated storage. + expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 + for: 10m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml new file mode 100644 index 0000000..77aba41 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml @@ -0,0 +1,129 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-apiserver-availability.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - interval: 3m + name: kube-apiserver-availability.rules + rules: + - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 + record: code_verb:apiserver_request_total:increase30d + - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + labels: + verb: read + record: code:apiserver_request_total:increase30d + - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + labels: + verb: write + record: code:apiserver_request_total:increase30d + - expr: sum by (cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job="apiserver"}[1h])) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h + - expr: sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d + - expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h + - expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d + - expr: |- + 1 - ( + ( + # write too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + ( + # read too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + ) + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d) + labels: + verb: all + record: apiserver_request:availability30d + - expr: |- + 1 - ( + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + # too slow + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) + labels: + verb: read + record: apiserver_request:availability30d + - expr: |- + 1 - ( + ( + # too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) + labels: + verb: write + record: apiserver_request:availability30d + - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: code_resource:apiserver_request_total:rate5m + - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: code_resource:apiserver_request_total:rate5m + - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml new file mode 100644 index 0000000..56041ca --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml @@ -0,0 +1,321 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-apiserver-burnrate.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-apiserver-burnrate.rules + rules: + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) + labels: + verb: read + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) + labels: + verb: read + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) + labels: + verb: read + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) + labels: + verb: read + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) + labels: + verb: read + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) + labels: + verb: read + record: apiserver_request:burnrate6h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + labels: + verb: write + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + labels: + verb: write + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + labels: + verb: write + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + labels: + verb: write + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + labels: + verb: write + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + labels: + verb: write + record: apiserver_request:burnrate6h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml new file mode 100644 index 0000000..01fa2b2 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml @@ -0,0 +1,30 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-apiserver-histogram.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-apiserver-histogram.rules + rules: + - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: read + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: write + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml new file mode 100644 index 0000000..c04101d --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml @@ -0,0 +1,76 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-apiserver-slos + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-apiserver-slos + rules: + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) + and + sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + for: 2m + labels: + long: 1h + severity: critical + short: 5m + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) + and + sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + for: 15m + labels: + long: 6h + severity: critical + short: 30m + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) + and + sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + for: 1h + labels: + long: 1d + severity: warning + short: 2h + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) + and + sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + for: 3h + labels: + long: 3d + severity: warning + short: 6h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml new file mode 100644 index 0000000..21b6339 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml @@ -0,0 +1,56 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-scheduler.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-scheduler.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml new file mode 100644 index 0000000..425a8f3 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system-controller-manager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system-controller-manager + rules: + - alert: KubeControllerManagerDown + annotations: + description: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml new file mode 100644 index 0000000..08583f2 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system-scheduler + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system-scheduler + rules: + - alert: KubeSchedulerDown + annotations: + description: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeschedulerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml new file mode 100644 index 0000000..9539a63 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml @@ -0,0 +1,40 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-apiserver + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-apiserver + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: https + scheme: https + metricRelabelings: + - action: drop + regex: apiserver_request_duration_seconds_bucket;(0.15|0.2|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2|3|3.5|4|4.5|6|7|8|9|15|25|40|50) + sourceLabels: + - __name__ + - le + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + insecureSkipVerify: false + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml new file mode 100644 index 0000000..eeafe1a --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml @@ -0,0 +1,33 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-kube-controller-manager + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-kube-controller-manager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app: kube-prometheus-stack-kube-controller-manager + release: "prometheus-community" + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml new file mode 100644 index 0000000..c23eae7 --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-kube-etcd + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-kube-etcd + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app: kube-prometheus-stack-kube-etcd + release: "prometheus-community" + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml new file mode 100644 index 0000000..228304e --- /dev/null +++ b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml @@ -0,0 +1,33 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-kube-scheduler + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-kube-scheduler + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app: kube-prometheus-stack-kube-scheduler + release: "prometheus-community" + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true \ No newline at end of file diff --git a/monitor/monitor-control-plan/ressources_no_ns.tf b/monitor/monitor-control-plan/ressources_no_ns.tf new file mode 100644 index 0000000..9fa58b7 --- /dev/null +++ b/monitor/monitor-control-plan/ressources_no_ns.tf @@ -0,0 +1,45 @@ + +# first loop through resources in ids_prio[0] +resource "kustomization_resource" "pre_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) +} + +# then loop through resources in ids_prio[1] +# and set an explicit depends_on on kustomization_resource.pre +# wait 2 minutes for any deployment or daemonset to become ready +resource "kustomization_resource" "main_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + wait = true + timeouts { + create = "5m" + update = "5m" + } + + depends_on = [kustomization_resource.pre_no_ns] +} + +# finally, loop through resources in ids_prio[2] +# and set an explicit depends_on on kustomization_resource.main +resource "kustomization_resource" "post_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + + depends_on = [kustomization_resource.main_no_ns] +} diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml new file mode 100644 index 0000000..760c393 --- /dev/null +++ b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml @@ -0,0 +1,28 @@ +--- +# Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-kube-controller-manager + labels: + app: kube-prometheus-stack-kube-controller-manager + jobLabel: kube-controller-manager + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 10257 + protocol: TCP + targetPort: 10257 + selector: + component: kube-controller-manager + type: ClusterIP \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml new file mode 100644 index 0000000..fd9be71 --- /dev/null +++ b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml @@ -0,0 +1,27 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-kube-etcd + labels: + app: kube-prometheus-stack-kube-etcd + jobLabel: kube-etcd + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 2381 + protocol: TCP + targetPort: 2381 + selector: + component: etcd + type: ClusterIP \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml new file mode 100644 index 0000000..7fbca4c --- /dev/null +++ b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml @@ -0,0 +1,27 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-kube-scheduler + labels: + app: kube-prometheus-stack-kube-scheduler + jobLabel: kube-scheduler + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 10259 + protocol: TCP + targetPort: 10259 + selector: + component: kube-scheduler + type: ClusterIP \ No newline at end of file diff --git a/monitor/node-exporter/apps_v1_DaemonSet_prometheus-community-prometheus-node-exporter.yaml b/monitor/node-exporter/apps_v1_DaemonSet_prometheus-community-prometheus-node-exporter.yaml new file mode 100644 index 0000000..b0ab711 --- /dev/null +++ b/monitor/node-exporter/apps_v1_DaemonSet_prometheus-community-prometheus-node-exporter.yaml @@ -0,0 +1,119 @@ +# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: prometheus-community-prometheus-node-exporter + namespace: vynil-monitor + labels: + helm.sh/chart: prometheus-node-exporter-4.25.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "1.7.0" + jobLabel: node-exporter + release: prometheus-community +spec: + selector: + matchLabels: + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + revisionHistoryLimit: 10 + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + labels: + helm.sh/chart: prometheus-node-exporter-4.25.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "1.7.0" + jobLabel: node-exporter + release: prometheus-community + spec: + automountServiceAccountToken: false + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: prometheus-community-prometheus-node-exporter + containers: + - name: node-exporter + image: quay.io/prometheus/node-exporter:v1.7.0 + imagePullPolicy: IfNotPresent + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --path.udev.data=/host/root/run/udev/data + - --web.listen-address=[$(HOST_IP)]:9100 + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ + securityContext: + readOnlyRootFilesystem: true + env: + - name: HOST_IP + value: 0.0.0.0 + ports: + - name: http-metrics + containerPort: 9100 + protocol: TCP + livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 9100 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 9100 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: root + mountPath: /host/root + mountPropagation: HostToContainer + readOnly: true + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/os: linux + tolerations: + - effect: NoSchedule + operator: Exists + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: root + hostPath: + path: / \ No newline at end of file diff --git a/monitor/node-exporter/datas.tf b/monitor/node-exporter/datas.tf new file mode 100644 index 0000000..422f06f --- /dev/null +++ b/monitor/node-exporter/datas.tf @@ -0,0 +1,21 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] + images { + name = "quay.io/prometheus/node-exporter" + new_name = "${var.images.node-exporter.registry}/${var.images.node-exporter.repository}" + new_tag = "${var.images.node-exporter.tag}" + } +} diff --git a/monitor/node-exporter/index.yaml b/monitor/node-exporter/index.yaml new file mode 100644 index 0000000..2b6f7fe --- /dev/null +++ b/monitor/node-exporter/index.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: node-exporter + description: null +options: + images: + default: + node-exporter: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/node-exporter + tag: v1.7.0 + examples: + - node-exporter: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/node-exporter + tag: v1.7.0 + properties: + node-exporter: + default: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/node-exporter + tag: v1.7.0 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: quay.io + type: string + repository: + default: prometheus/node-exporter + type: string + tag: + default: v1.7.0 + type: string + type: object + type: object +dependencies: [] +providers: + kubernetes: true + authentik: null + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.rules.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.rules.yaml new file mode 100644 index 0000000..c2ab2d0 --- /dev/null +++ b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.rules.yaml @@ -0,0 +1,82 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-node-exporter.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: node-exporter.rules + rules: + - expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum + - expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m + - expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio + - expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio + - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m + - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m + - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + - expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.yaml new file mode 100644 index 0000000..fcb6a7b --- /dev/null +++ b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-exporter.yaml @@ -0,0 +1,328 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-node-exporter + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: node-exporter + rules: + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 5% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: warning + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 3% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: critical + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 24 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 4 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 5% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 3% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeNetworkReceiveErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs + summary: Network interface is reporting many receive errors. + expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 + for: 1h + labels: + severity: warning + - alert: NodeNetworkTransmitErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs + summary: Network interface is reporting many transmit errors. + expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 + for: 1h + labels: + severity: warning + - alert: NodeHighNumberConntrackEntriesUsed + annotations: + description: '{{ $value | humanizePercentage }} of conntrack entries are used.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused + summary: Number of conntrack are getting close to the limit. + expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 + labels: + severity: warning + - alert: NodeTextFileCollectorScrapeError + annotations: + description: Node Exporter text file collector on {{ $labels.instance }} failed to scrape. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: warning + - alert: NodeClockSkewDetected + annotations: + description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected + summary: Clock skew detected. + expr: |- + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: 10m + labels: + severity: warning + - alert: NodeClockNotSynchronising + annotations: + description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising + summary: Clock not synchronising. + expr: |- + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: 10m + labels: + severity: warning + - alert: NodeRAIDDegraded + annotations: + description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded + summary: RAID Array is degraded. + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 + for: 15m + labels: + severity: critical + - alert: NodeRAIDDiskFailure + annotations: + description: At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure + summary: Failed device in RAID array. + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 + labels: + severity: warning + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: 15m + labels: + severity: warning + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: 15m + labels: + severity: critical + - alert: NodeCPUHighUsage + annotations: + description: 'CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage + summary: High CPU usage. + expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 + for: 15m + labels: + severity: info + - alert: NodeSystemSaturation + annotations: + description: 'System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + + This might indicate this instance resources saturation and can cause it becoming unresponsive. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation + summary: System saturated, load per core is very high. + expr: |- + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: 15m + labels: + severity: warning + - alert: NodeMemoryMajorPagesFaults + annotations: + description: 'Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + + Please check that there is enough memory available at this instance. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults + summary: Memory major page faults are occurring at very high rate. + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: 15m + labels: + severity: warning + - alert: NodeMemoryHighUtilization + annotations: + description: 'Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization + summary: Host is running out of memory. + expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 + for: 15m + labels: + severity: warning + - alert: NodeDiskIOSaturation + annotations: + description: 'Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + + This symptom might indicate disk saturation. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation + summary: Disk IO queue is high. + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 + for: 30m + labels: + severity: warning + - alert: NodeSystemdServiceFailed + annotations: + description: Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed + summary: Systemd service has entered failed state. + expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: 5m + labels: + severity: warning + - alert: NodeBondingDegraded + annotations: + description: Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded + summary: Bonding interface is degraded + expr: (node_bonding_slaves - node_bonding_active) != 0 + for: 5m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-network.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-network.yaml new file mode 100644 index 0000000..4e71d74 --- /dev/null +++ b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node-network.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-node-network + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: node-network + rules: + - alert: NodeNetworkInterfaceFlapping + annotations: + description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping + summary: Network interface is often changing its status + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node.rules.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node.rules.yaml new file mode 100644 index 0000000..a10a0a2 --- /dev/null +++ b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-node.rules.yaml @@ -0,0 +1,56 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-node.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: node.rules + rules: + - expr: |- + topk by (cluster, namespace, pod) (1, + max by (cluster, node, namespace, pod) ( + label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") + )) + record: 'node_namespace_pod:kube_pod_info:' + - expr: |- + count by (cluster, node) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on (cluster, namespace, pod) group_left(node) + topk by (cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:) + ) + record: node:node_num_cpu:sum + - expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by (cluster) + record: :node_memory_MemAvailable_bytes:sum + - expr: |- + avg by (cluster, node) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m + - expr: |- + avg by (cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) + record: cluster:node_cpu:ratio_rate5m \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-prometheus-node-exporter.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-prometheus-node-exporter.yaml new file mode 100644 index 0000000..94a9810 --- /dev/null +++ b/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-prometheus-node-exporter.yaml @@ -0,0 +1,28 @@ +# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-prometheus-node-exporter + namespace: vynil-monitor + labels: + helm.sh/chart: prometheus-node-exporter-4.25.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "1.7.0" + jobLabel: node-exporter + release: prometheus-community +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + attachMetadata: + node: false + endpoints: + - port: http-metrics + scheme: http \ No newline at end of file diff --git a/monitor/node-exporter/v1_ServiceAccount_prometheus-community-prometheus-node-exporter.yaml b/monitor/node-exporter/v1_ServiceAccount_prometheus-community-prometheus-node-exporter.yaml new file mode 100644 index 0000000..fc7a437 --- /dev/null +++ b/monitor/node-exporter/v1_ServiceAccount_prometheus-community-prometheus-node-exporter.yaml @@ -0,0 +1,17 @@ +--- +# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-community-prometheus-node-exporter + namespace: vynil-monitor + labels: + helm.sh/chart: prometheus-node-exporter-4.25.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "1.7.0" + jobLabel: node-exporter + release: prometheus-community \ No newline at end of file diff --git a/monitor/node-exporter/v1_Service_prometheus-community-prometheus-node-exporter.yaml b/monitor/node-exporter/v1_Service_prometheus-community-prometheus-node-exporter.yaml new file mode 100644 index 0000000..2fc480a --- /dev/null +++ b/monitor/node-exporter/v1_Service_prometheus-community-prometheus-node-exporter.yaml @@ -0,0 +1,28 @@ +# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-prometheus-node-exporter + namespace: vynil-monitor + labels: + helm.sh/chart: prometheus-node-exporter-4.25.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "1.7.0" + jobLabel: node-exporter + release: prometheus-community + annotations: + prometheus.io/scrape: "true" +spec: + type: ClusterIP + ports: + - port: 9100 + targetPort: 9100 + protocol: TCP + name: http-metrics + selector: + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: prometheus-community \ No newline at end of file diff --git a/monitor/prometheus/datas.tf b/monitor/prometheus/datas.tf new file mode 100644 index 0000000..d01f2ca --- /dev/null +++ b/monitor/prometheus/datas.tf @@ -0,0 +1,222 @@ +locals { + common-labels = { + "vynil.solidite.fr/owner-name" = var.instance + "vynil.solidite.fr/owner-namespace" = var.namespace + "vynil.solidite.fr/owner-category" = var.category + "vynil.solidite.fr/owner-component" = var.component + "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/instance" = var.instance + } + rb-patch = <<-EOF + - op: replace + path: /subjects/0/namespace + value: "${var.namespace}" + EOF +} + +data "kubernetes_secret_v1" "authentik" { + metadata { + name = "authentik" + namespace = "${var.domain}-auth" + } +} + +data "kustomization_overlay" "data" { + common_labels = local.common-labels + namespace = var.namespace + resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] + patches { + target { + kind = "Prometheus" + name = "prometheus-community-kube-prometheus" + } + patch = <<-EOF + apiVersion: monitoring.coreos.com/v1 + kind: Prometheus + metadata: + name: prometheus-community-kube-prometheus + spec: + image: "${var.images.prometheus.registry}/${var.images.prometheus.repository}:${var.images.prometheus.tag}" + version: ${var.images.prometheus.tag} + externalUrl: http://prometheus-community-kube-prometheus.${var.namespace}:9090 + replicas: ${var.replicas} + shards: ${var.shards} + logLevel: ${var.logLevel} + listenLocal: ${var.listenLocal} + enableAdminAPI: ${var.enableAdminAPI} + retention: "${var.retention}" + EOF + } + patches { + target { + kind = "ConfigMap" + name = "prometheus-community-kube-grafana-datasource" + } + patch = <<-EOF + apiVersion: v1 + kind: ConfigMap + metadata: + name: prometheus-community-kube-grafana-datasource + data: + datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + uid: prometheus + url: http://prometheus-community-kube-prometheus.${var.namespace}:9090/ + access: proxy + isDefault: false + jsonData: + httpMethod: POST + timeInterval: 30s + EOF + } + patches { + target { + kind = "ServiceMonitor" + name = "prometheus-community-kube-prometheus" + } + patch = <<-EOF + - op: replace + path: /spec/namespaceSelector/matchNames/0 + value: "${var.namespace}" + EOF + } + + patches { + target { + kind = "PrometheusRule" + name = "prometheus-community-kube-prometheus" + } + patch = <<-EOF + apiVersion: monitoring.coreos.com/v1 + kind: PrometheusRule + metadata: + name: prometheus-community-kube-prometheus + spec: + groups: + - name: prometheus + rules: + - alert: PrometheusBadConfig + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) == 0 + - alert: PrometheusSDRefreshFailure + expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[10m]) > 0 + - alert: PrometheusNotificationQueueRunningFull + expr: |- + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + ) + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + expr: |- + ( + rate(prometheus_notifications_errors_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + ) + * 100 + > 1 + - alert: PrometheusNotConnectedToAlertmanagers + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) < 1 + - alert: PrometheusTSDBReloadsFailing + expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[3h]) > 0 + - alert: PrometheusTSDBCompactionsFailing + expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[3h]) > 0 + - alert: PrometheusNotIngestingSamples + expr: |- + ( + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}) > 0 + ) + ) + - alert: PrometheusDuplicateTimestamps + expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusOutOfOrderTimestamps + expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusRemoteStorageFailures + expr: |- + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m])) + ) + ) + * 100 + > 1 + - alert: PrometheusRemoteWriteBehind + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + - ignoring(remote_name, url) group_right + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + ) + > 120 + - alert: PrometheusRemoteWriteDesiredShards + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) + ) + - alert: PrometheusRuleFailures + expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusMissingRuleEvaluations + expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusTargetLimitHit + expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusLabelLimitHit + expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusScrapeBodySizeLimitHit + expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusScrapeSampleLimitHit + expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0 + - alert: PrometheusTargetSyncFailure + expr: increase(prometheus_target_sync_failed_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[30m]) > 0 + - alert: PrometheusHighQueryLoad + expr: avg_over_time(prometheus_engine_queries{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-community-kube-prometheus",namespace="${var.namespace}"}[5m]) > 0.8 + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + expr: |- + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-community-kube-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m]) + ) + * 100 + > 3 + EOF + } +} + +data "kustomization_overlay" "data_no_ns" { + common_labels = local.common-labels + resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("ClusterRole",file))>0] + + patches { + target { + kind = "ClusterRoleBinding" + name = "prometheus-community-kube-prometheus" + } + patch = local.rb-patch + } +} diff --git a/monitor/prometheus/index.yaml b/monitor/prometheus/index.yaml new file mode 100644 index 0000000..9674969 --- /dev/null +++ b/monitor/prometheus/index.yaml @@ -0,0 +1,112 @@ +--- +apiVersion: vinyl.solidite.fr/v1beta1 +kind: Component +category: monitor +metadata: + name: prometheus + description: null +options: + sub-domain: + default: prometheus + examples: + - prometheus + type: string + domain: + default: your-company + examples: + - your-company + type: string + issuer: + default: letsencrypt-prod + examples: + - letsencrypt-prod + type: string + shards: + default: 1 + examples: + - 1 + type: integer + retention: + default: 10d + examples: + - 10d + type: string + ingress-class: + default: traefik + examples: + - traefik + type: string + domain-name: + default: your_company.com + examples: + - your_company.com + type: string + replicas: + default: 1 + examples: + - 1 + type: integer + logLevel: + default: info + examples: + - info + type: string + enableAdminAPI: + default: false + examples: + - false + type: boolean + images: + default: + prometheus: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/prometheus + tag: v2.49.1 + examples: + - prometheus: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/prometheus + tag: v2.49.1 + properties: + prometheus: + default: + pullPolicy: IfNotPresent + registry: quay.io + repository: prometheus/prometheus + tag: v2.49.1 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: quay.io + type: string + repository: + default: prometheus/prometheus + type: string + tag: + default: v2.49.1 + type: string + type: object + type: object + listenLocal: + default: false + examples: + - false + type: boolean +dependencies: [] +providers: + kubernetes: true + authentik: true + kubectl: true + postgresql: null + restapi: null + http: null + gitea: null +tfaddtype: null diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml new file mode 100644 index 0000000..2e2bc25 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml @@ -0,0 +1,31 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-config-reloaders + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: config-reloaders + rules: + - alert: ConfigReloaderSidecarErrors + annotations: + description: 'Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace. + + As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors + summary: config-reloader sidecar has not had a successful reload for 10m + expr: max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0 + for: 10m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml new file mode 100644 index 0000000..e97cbcb --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml @@ -0,0 +1,67 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-general.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: general.rules + rules: + - alert: TargetDown + annotations: + description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown + summary: One or more targets are unreachable. + expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up) BY (cluster, job, namespace, service)) > 10 + for: 10m + labels: + severity: warning + - alert: Watchdog + annotations: + description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty. + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog + summary: An alert that should always be firing to certify that Alertmanager is working properly. + expr: vector(1) + labels: + severity: none + - alert: InfoInhibitor + annotations: + description: 'This is an alert that is used to inhibit info alerts. + + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + + other alerts. + + This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a + + severity of ''warning'' or ''critical'' starts firing on the same namespace. + + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + + ' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor + summary: Info-level alert inhibition. + expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 + labels: + severity: none \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml new file mode 100644 index 0000000..2b966a4 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml @@ -0,0 +1,27 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-cpu-usage-seconds + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_cpu_usage_seconds_total + rules: + - expr: |- + sum by (cluster, namespace, pod, container) ( + irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( + 1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml new file mode 100644 index 0000000..3b3b9ef --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml @@ -0,0 +1,26 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-memory-cache + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_memory_cache + rules: + - expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, + max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml new file mode 100644 index 0000000..79f9c33 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml @@ -0,0 +1,26 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-memory-rss + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_memory_rss + rules: + - expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, + max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml new file mode 100644 index 0000000..84fad2f --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml @@ -0,0 +1,26 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-memory-swap + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_memory_swap + rules: + - expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, + max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml new file mode 100644 index 0000000..e6ef243 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml @@ -0,0 +1,26 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-memory-working-se + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_memory_working_set_bytes + rules: + - expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, + max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml new file mode 100644 index 0000000..d364a79 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml @@ -0,0 +1,88 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.container-resource + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.container_resource + rules: + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} + ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} + ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} + ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml new file mode 100644 index 0000000..97d5bd9 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml @@ -0,0 +1,67 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-k8s.rules.pod-owner + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: k8s.rules.pod_owner + rules: + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on (replicaset, namespace) group_left(owner_name) topk by (replicaset, namespace) ( + 1, max by (replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="kube-state-metrics"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + record: namespace_workload_pod:kube_pod_owner:relabel \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml new file mode 100644 index 0000000..94be7d6 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml @@ -0,0 +1,24 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-prometheus-general.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-prometheus-general.rules + rules: + - expr: count without(instance, pod, node) (up == 1) + record: count:up1 + - expr: count without(instance, pod, node) (up == 0) + record: count:up0 \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml new file mode 100644 index 0000000..06cb036 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,32 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-prometheus-node-recording.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) + record: instance:node_cpu:rate:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml new file mode 100644 index 0000000..11b23d4 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-state-metrics.yaml @@ -0,0 +1,68 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kube-state-metrics + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kube-state-metrics + rules: + - alert: KubeStateMetricsListErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: |- + (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsWatchErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardingMismatch + annotations: + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) != 0 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardsMissing + annotations: + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: |- + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1 + - + sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster) + != 0 + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml new file mode 100644 index 0000000..e8bdb8c --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubelet.rules.yaml @@ -0,0 +1,32 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubelet.rules + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubelet.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.99' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.9' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.5' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml new file mode 100644 index 0000000..dc3ca81 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml @@ -0,0 +1,258 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-apps + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-apps + rules: + - alert: KubePodCrashLooping + annotations: + description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping + summary: Pod is crash looping. + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~".*"}[5m]) >= 1 + for: 15m + labels: + severity: warning + - alert: KubePodNotReady + annotations: + description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready + summary: Pod has been in a non-ready state for more than 15 minutes. + expr: |- + sum by (namespace, pod, cluster) ( + max by (namespace, pod, cluster) ( + kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"} + ) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) ( + 1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 + for: 15m + labels: + severity: warning + - alert: KubeDeploymentGenerationMismatch + annotations: + description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch + summary: Deployment generation mismatch due to possible roll-back + expr: |- + kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"} + != + kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning + - alert: KubeDeploymentReplicasMismatch + annotations: + description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch + summary: Deployment has not matched the expected number of replicas. + expr: |- + ( + kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"} + > + kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning + - alert: KubeDeploymentRolloutStuck + annotations: + description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck + summary: Deployment rollout is not progressing. + expr: |- + kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"} + != 0 + for: 15m + labels: + severity: warning + - alert: KubeStatefulSetReplicasMismatch + annotations: + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch + summary: StatefulSet has not matched the expected number of replicas. + expr: |- + ( + kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning + - alert: KubeStatefulSetGenerationMismatch + annotations: + description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch + summary: StatefulSet generation mismatch due to possible roll-back + expr: |- + kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout + summary: StatefulSet update has not been rolled out. + expr: |- + ( + max without (revision) ( + kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"} + ) + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning + - alert: KubeDaemonSetRolloutStuck + annotations: + description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck + summary: DaemonSet rollout is stuck. + expr: |- + ( + ( + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) or ( + kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} + != + 0 + ) or ( + kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) or ( + kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) + ) and ( + changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning + - alert: KubeContainerWaiting + annotations: + description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting + summary: Pod container waiting longer than 1 hour + expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~".*"}) > 0 + for: 1h + labels: + severity: warning + - alert: KubeDaemonSetNotScheduled + annotations: + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled + summary: DaemonSet pods are not scheduled. + expr: |- + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0 + for: 10m + labels: + severity: warning + - alert: KubeDaemonSetMisScheduled + annotations: + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled + summary: DaemonSet pods are misscheduled. + expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} > 0 + for: 15m + labels: + severity: warning + - alert: KubeJobNotCompleted + annotations: + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted + summary: Job did not complete in time + expr: |- + time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"} + and + kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200 + labels: + severity: warning + - alert: KubeJobFailed + annotations: + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed + summary: Job failed to complete. + expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0 + for: 15m + labels: + severity: warning + - alert: KubeHpaReplicasMismatch + annotations: + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch + summary: HPA has not matched desired number of replicas. + expr: |- + (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"} + != + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + > + kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + < + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}) + and + changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0 + for: 15m + labels: + severity: warning + - alert: KubeHpaMaxedOut + annotations: + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout + summary: HPA is running at max replicas + expr: |- + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + == + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml new file mode 100644 index 0000000..5669777 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml @@ -0,0 +1,122 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-resources + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-resources + rules: + - alert: KubeCPUOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="kube-state-metrics",}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + - alert: KubeMemoryOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + - alert: KubeCPUQuotaOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeMemoryQuotaOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeQuotaAlmostFull + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 0.9 < 1 + for: 15m + labels: + severity: info + - alert: KubeQuotaFullyUsed + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused + summary: Namespace quota is fully used. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + == 1 + for: 15m + labels: + severity: info + - alert: KubeQuotaExceeded + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 1 + for: 15m + labels: + severity: warning + - alert: CPUThrottlingHigh + annotations: + description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh + summary: Processes experience elevated CPU throttling. + expr: |- + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace) + > ( 25 / 100 ) + for: 15m + labels: + severity: info \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml new file mode 100644 index 0000000..5a6ee38 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml @@ -0,0 +1,113 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-storage + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-storage + rules: + - alert: KubePersistentVolumeFillingUp + annotations: + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is only {{ $value | humanizePercentage }} free. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical + - alert: KubePersistentVolumeFillingUp + annotations: + description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning + - alert: KubePersistentVolumeInodesFillingUp + annotations: + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} only has {{ $value | humanizePercentage }} free inodes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical + - alert: KubePersistentVolumeInodesFillingUp + annotations: + description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning + - alert: KubePersistentVolumeErrors + annotations: + description: The persistent volume {{ $labels.persistentvolume }} on Cluster {{ $labels.cluster }} has status {{ $labels.phase }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors + summary: PersistentVolume is having issues with provisioning. + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 + for: 5m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml new file mode 100644 index 0000000..47dbf5a --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml @@ -0,0 +1,64 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system-apiserver + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system-apiserver + rules: + - alert: KubeClientCertificateExpiration + annotations: + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on (job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + for: 5m + labels: + severity: warning + - alert: KubeClientCertificateExpiration + annotations: + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on (job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + for: 5m + labels: + severity: critical + - alert: KubeAggregatedAPIErrors + annotations: + description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors + summary: Kubernetes aggregated API has reported errors. + expr: sum by (name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + labels: + severity: warning + - alert: KubeAggregatedAPIDown + annotations: + description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown + summary: Kubernetes aggregated API is down. + expr: (1 - max by (name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85 + for: 5m + labels: + severity: warning + - alert: KubeAPITerminatedRequests + annotations: + description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests + summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. + expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 + for: 5m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml new file mode 100644 index 0000000..60cf44d --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kube-proxy.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system-kube-proxy + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system-kube-proxy + rules: + - alert: KubeProxyDown + annotations: + description: KubeProxy has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-proxy"} == 1) + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml new file mode 100644 index 0000000..829d7a3 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml @@ -0,0 +1,140 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system-kubelet + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system-kubelet + rules: + - alert: KubeNodeNotReady + annotations: + description: '{{ $labels.node }} has been unready for more than 15 minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready + summary: Node is not ready. + expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 15m + labels: + severity: warning + - alert: KubeNodeUnreachable + annotations: + description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable + summary: Node is unreachable. + expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 + for: 15m + labels: + severity: warning + - alert: KubeletTooManyPods + annotations: + description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods + summary: Kubelet is running at capacity. + expr: |- + count by (cluster, node) ( + (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,cluster) group_left(node) topk by (instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"}) + ) + / + max by (cluster, node) ( + kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 + ) > 0.95 + for: 15m + labels: + severity: info + - alert: KubeNodeReadinessFlapping + annotations: + description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping + summary: Node readiness status is flapping. + expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2 + for: 15m + labels: + severity: warning + - alert: KubeletPlegDurationHigh + annotations: + description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh + summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. + expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + for: 5m + labels: + severity: warning + - alert: KubeletPodStartUpLatencyHigh + annotations: + description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh + summary: Kubelet Pod startup latency is too high. + expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 + for: 15m + labels: + severity: warning + - alert: KubeletClientCertificateExpiration + annotations: + description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 604800 + labels: + severity: warning + - alert: KubeletClientCertificateExpiration + annotations: + description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 86400 + labels: + severity: critical + - alert: KubeletServerCertificateExpiration + annotations: + description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 604800 + labels: + severity: warning + - alert: KubeletServerCertificateExpiration + annotations: + description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 86400 + labels: + severity: critical + - alert: KubeletClientCertificateRenewalErrors + annotations: + description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes). + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors + summary: Kubelet has failed to renew its client certificate. + expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: KubeletServerCertificateRenewalErrors + annotations: + description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes). + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors + summary: Kubelet has failed to renew its server certificate. + expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: KubeletDown + annotations: + description: Kubelet has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml new file mode 100644 index 0000000..31fa40c --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml @@ -0,0 +1,42 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-kubernetes-system + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: kubernetes-system + rules: + - alert: KubeVersionMismatch + annotations: + description: There are {{ $value }} different semantic versions of Kubernetes components running. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch + summary: Different semantic versions of Kubernetes components running. + expr: count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 + for: 15m + labels: + severity: warning + - alert: KubeClientErrors + annotations: + description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors + summary: Kubernetes API server client is experiencing errors. + expr: |- + (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by (cluster, instance, job, namespace) + / + sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by (cluster, instance, job, namespace)) + > 0.01 + for: 15m + labels: + severity: warning \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..31836a6 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,280 @@ +# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + groups: + - name: prometheus + rules: + - alert: PrometheusBadConfig + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig + summary: Failed Prometheus configuration reload. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) == 0 + for: 10m + labels: + severity: critical + - alert: PrometheusSDRefreshFailure + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to refresh SD with mechanism {{$labels.mechanism}}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheussdrefreshfailure + summary: Failed Prometheus SD refresh. + expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[10m]) > 0 + for: 20m + labels: + severity: warning + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull + summary: Prometheus alert notification queue predicted to run full in less than 30m. + expr: |- + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + annotations: + description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers + summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. + expr: |- + ( + rate(prometheus_notifications_errors_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + ) + * 100 + > 1 + for: 15m + labels: + severity: warning + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers + summary: Prometheus is not connected to any Alertmanagers. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing + summary: Prometheus has issues reloading blocks from disk. + expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing + summary: Prometheus has issues compacting blocks. + expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples + summary: Prometheus is not ingesting samples. + expr: |- + ( + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}) > 0 + ) + ) + for: 10m + labels: + severity: warning + - alert: PrometheusDuplicateTimestamps + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps + summary: Prometheus is dropping samples with duplicate timestamps. + expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusOutOfOrderTimestamps + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps + summary: Prometheus drops samples with out-of-order timestamps. + expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusRemoteStorageFailures + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures + summary: Prometheus fails to send samples to remote storage. + expr: |- + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m])) + ) + ) + * 100 + > 1 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteBehind + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind + summary: Prometheus remote write is behind. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + - ignoring(remote_name, url) group_right + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + ) + > 120 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteDesiredShards + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}` $labels.instance | query | first | value }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards + summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusRuleFailures + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures + summary: Prometheus is failing rule evaluations. + expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: PrometheusMissingRuleEvaluations + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations + summary: Prometheus is missing rule evaluations due to slow rule group evaluation. + expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusTargetLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. + expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusLabelLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. + expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusScrapeBodySizeLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit + summary: Prometheus has dropped some targets that exceeded body size limit. + expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusScrapeSampleLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit + summary: Prometheus has failed scrapes that have exceeded the configured sample limit. + expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusTargetSyncFailure + annotations: + description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure + summary: Prometheus has failed to sync targets. + expr: increase(prometheus_target_sync_failed_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[30m]) > 0 + for: 5m + labels: + severity: critical + - alert: PrometheusHighQueryLoad + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload + summary: Prometheus is reaching its maximum capacity serving concurrent requests. + expr: avg_over_time(prometheus_engine_queries{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-community-kube-prometheus",namespace="vynil-monitor"}[5m]) > 0.8 + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + annotations: + description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager + summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. + expr: |- + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-community-kube-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m]) + ) + * 100 + > 3 + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_Prometheus_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/monitoring.coreos.com_v1_Prometheus_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..4158693 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_Prometheus_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,67 @@ +# Source: kube-prometheus-stack/templates/prometheus/prometheus.yaml +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + image: "quay.io/prometheus/prometheus:v2.49.1" + version: v2.49.1 + externalUrl: http://prometheus-community-kube-prometheus.vynil-monitor:9090 + paused: false + replicas: 1 + shards: 1 + logLevel: info + logFormat: logfmt + listenLocal: false + enableAdminAPI: false + retention: "10d" + tsdb: + outOfOrderTimeWindow: 0s + walCompression: true + routePrefix: "/" + serviceAccountName: prometheus-community-kube-prometheus + serviceMonitorSelector: + matchLabels: + release: "prometheus-community" + + serviceMonitorNamespaceSelector: {} + podMonitorSelector: + matchLabels: + release: "prometheus-community" + + podMonitorNamespaceSelector: {} + probeSelector: + matchLabels: + release: "prometheus-community" + + probeNamespaceSelector: {} + securityContext: + fsGroup: 2000 + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + ruleNamespaceSelector: {} + ruleSelector: + matchLabels: + release: "prometheus-community" + + scrapeConfigSelector: + matchLabels: + release: "prometheus-community" + + scrapeConfigNamespaceSelector: {} + portName: http-web + hostNetwork: false \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-coredns.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-coredns.yaml new file mode 100644 index 0000000..367d5f4 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-coredns.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-coredns + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-coredns + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app: kube-prometheus-stack-coredns + release: "prometheus-community" + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-proxy.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-proxy.yaml new file mode 100644 index 0000000..f41c003 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-proxy.yaml @@ -0,0 +1,29 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-kube-proxy + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-kube-proxy + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + jobLabel: jobLabel + + selector: + matchLabels: + app: kube-prometheus-stack-kube-proxy + release: "prometheus-community" + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kubelet.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kubelet.yaml new file mode 100644 index 0000000..adc1f8a --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kubelet.yaml @@ -0,0 +1,95 @@ +# Source: kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-kubelet + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-kubelet + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + + attachMetadata: + node: false + endpoints: + - port: https-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + honorTimestamps: true + relabelings: + - action: replace + sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - port: https-metrics + scheme: https + path: /metrics/cadvisor + honorLabels: true + honorTimestamps: true + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + metricRelabelings: + - action: drop + regex: container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total) + sourceLabels: + - __name__ + - action: drop + regex: container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total) + sourceLabels: + - __name__ + - action: drop + regex: container_memory_(mapped_file|swap) + sourceLabels: + - __name__ + - action: drop + regex: container_(file_descriptors|tasks_state|threads_max) + sourceLabels: + - __name__ + - action: drop + regex: container_spec.* + sourceLabels: + - __name__ + - action: drop + regex: .+; + sourceLabels: + - id + - pod + relabelings: + - action: replace + sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - port: https-metrics + scheme: https + path: /metrics/probes + honorLabels: true + honorTimestamps: true + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + relabelings: + - action: replace + sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + app.kubernetes.io/name: kubelet + k8s-app: kubelet \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..e6497a6 --- /dev/null +++ b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,32 @@ +# Source: kube-prometheus-stack/templates/prometheus/servicemonitor.yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + + selector: + matchLabels: + app: kube-prometheus-stack-prometheus + release: "prometheus-community" + self-monitor: "true" + namespaceSelector: + matchNames: + - "vynil-monitor" + endpoints: + - port: http-web + path: "/metrics" + - port: reloader-web + scheme: http + path: "/metrics" \ No newline at end of file diff --git a/monitor/prometheus/presentation.tf b/monitor/prometheus/presentation.tf new file mode 100644 index 0000000..8063a28 --- /dev/null +++ b/monitor/prometheus/presentation.tf @@ -0,0 +1,75 @@ +locals { + dns-name = "${var.sub-domain}.${var.domain-name}" + dns-names = [local.dns-name] + app-name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance) + icon = "icon.svg" + request_headers = { + "Content-Type" = "application/json" + Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}" + } + service = { + "name" = "prometheus-community-kube-prometheus" + "port" = { + "number" = 9090 + } + } +} + +module "ingress" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//ingress" + component = "" + instance = var.instance + namespace = var.namespace + issuer = var.issuer + ingress_class = var.ingress-class + labels = local.common-labels + dns_names = local.dns-names + middlewares = ["forward-${local.app-name}"] + services = [local.service] + providers = { + kubectl = kubectl + } +} + +module "application" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//application" + component = var.component + instance = var.instance + app_group = var.app-group + dns_name = local.dns-name + icon = local.icon + protocol_provider = module.forward.provider-id + providers = { + authentik = authentik + } +} + +provider "restapi" { + uri = "http://authentik.${var.domain}-auth.svc/api/v3/" + headers = local.request_headers + create_method = "PATCH" + update_method = "PATCH" + destroy_method = "PATCH" + write_returns_object = true + id_attribute = "name" +} + +module "forward" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//forward" + component = var.component + instance = var.instance + domain = var.domain + namespace = var.namespace + ingress_class = var.ingress-class + labels = local.common-labels + dns_names = local.dns-names + service = local.service + icon = local.icon + request_headers = local.request_headers + providers = { + restapi = restapi + http = http + kubectl = kubectl + authentik = authentik + } +} diff --git a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..d4d8d74 --- /dev/null +++ b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,23 @@ +# Source: kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-community-kube-prometheus + labels: + app: kube-prometheus-stack-prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-community-kube-prometheus +subjects: + - kind: ServiceAccount + name: prometheus-community-kube-prometheus + namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..2a1e90f --- /dev/null +++ b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,33 @@ +# Source: kube-prometheus-stack/templates/prometheus/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-community-kube-prometheus + labels: + app: kube-prometheus-stack-prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +rules: +# This permission are not in the kube-prometheus repo +# they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml +- apiGroups: [""] + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: + - "networking.k8s.io" + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] \ No newline at end of file diff --git a/monitor/prometheus/ressources_no_ns.tf b/monitor/prometheus/ressources_no_ns.tf new file mode 100644 index 0000000..9fa58b7 --- /dev/null +++ b/monitor/prometheus/ressources_no_ns.tf @@ -0,0 +1,45 @@ + +# first loop through resources in ids_prio[0] +resource "kustomization_resource" "pre_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) +} + +# then loop through resources in ids_prio[1] +# and set an explicit depends_on on kustomization_resource.pre +# wait 2 minutes for any deployment or daemonset to become ready +resource "kustomization_resource" "main_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + wait = true + timeouts { + create = "5m" + update = "5m" + } + + depends_on = [kustomization_resource.pre_no_ns] +} + +# finally, loop through resources in ids_prio[2] +# and set an explicit depends_on on kustomization_resource.main +resource "kustomization_resource" "post_no_ns" { + for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] + + manifest = ( + contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) + ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) + : data.kustomization_overlay.data_no_ns.manifests[each.value] + ) + + depends_on = [kustomization_resource.main_no_ns] +} diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-alertmanager-overview.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-alertmanager-overview.yaml new file mode 100644 index 0000000..4703b52 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-alertmanager-overview.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-alertmanager-overview + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + alertmanager-overview.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"current set of alerts stored in the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate of successful and invalid alerts received by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Received","refId":"A"},{"expr":"sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Invalid","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts receive rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Alerts","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate of successful and invalid notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Total","refId":"A"},{"expr":"sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Failed","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$integration: Notifications Send Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"latency of notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} 99th Percentile","refId":"A"},{"expr":"histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Median","refId":"B"},{"expr":"sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Average","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$integration: Notification Duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Notifications","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["alertmanager-mixin"],"templating":{"list":[{"current":{"text":"Prometheus","value":"Prometheus"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(alertmanager_alerts, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":"service","multi":false,"name":"service","options":[],"query":"label_values(alertmanager_alerts, service)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"all","value":"$__all"},"datasource":"$datasource","hide":2,"includeAll":true,"label":null,"multi":false,"name":"integration","options":[],"query":"label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Alertmanager / Overview","uid":"alertmanager-overview","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml new file mode 100644 index 0000000..b9f723d --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-cluster-total + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + cluster-total.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"Value #G","value":"Value #G"},{"text":"Value #H","value":"Value #H"},{"text":"namespace","value":"namespace"}],"datasource":"$datasource","fill":1,"fontSize":"90%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Current Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"pps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"G","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"H","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":6,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":11},"id":9,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth History","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":12},"id":10,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":21},"id":11,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":31},"id":13,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":40},"id":14,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":15,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":50},"id":16,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":17,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":18,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[{"targetBlank":true,"title":"What is TCP Retransmit?","url":"https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/"}],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of TCP Retransmits out of all sent segments","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":19,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[{"targetBlank":true,"title":"Why monitor SYN retransmits?","url":"https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365"}],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of TCP SYN Retransmits out of all retransmits","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Cluster","uid":"ff635a025bcfea7bc3dd4f508990a3e9","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-datasource.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-datasource.yaml new file mode 100644 index 0000000..9a9d25c --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-datasource.yaml @@ -0,0 +1,38 @@ +# Source: kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-community-kube-grafana-datasource + namespace: vynil-monitor + labels: + grafana_datasource: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + uid: prometheus + url: http://prometheus-community-kube-prometheus.vynil-monitor:9090/ + access: proxy + isDefault: true + jsonData: + httpMethod: POST + timeInterval: 30s + - name: Alertmanager + type: alertmanager + uid: alertmanager + url: http://prometheus-community-kube-alertmanager.vynil-monitor:9093/ + access: proxy + jsonData: + handleGrafanaManagedAlerts: false + implementation: prometheus \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml new file mode 100644 index 0000000..4dec03e --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-grafana-overview + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + grafana-overview.json: |- + {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":3085,"iteration":1631554945276,"links":[],"panels":[{"datasource":"$datasource","fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.1.3","targets":[{"expr":"grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Firing Alerts","type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":6,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.1.3","targets":[{"expr":"sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Dashboards","type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{"align":null,"displayMode":"auto"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":12,"x":12,"y":0},"id":10,"options":{"showHeader":true},"pluginVersion":"8.1.3","targets":[{"expr":"grafana_build_info{job=~\"$job\", instance=~\"$instance\"}","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Build Info","transformations":[{"id":"labelsToFields","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true,"Value":true,"branch":true,"container":true,"goversion":true,"namespace":true,"pod":true,"revision":true},"indexByName":{"Time":7,"Value":11,"branch":4,"container":8,"edition":2,"goversion":6,"instance":1,"job":0,"namespace":9,"pod":10,"revision":5,"version":3},"renameByName":{}}}],"type":"table"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":5},"hiddenSeries":false,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"8.1.3","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ","interval":"","legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"RPS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"$$hashKey":"object:157","format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"$$hashKey":"object:158","format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":5},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"8.1.3","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"exemplar":true,"expr":"histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"","legendFormat":"99th Percentile","refId":"A"},{"exemplar":true,"expr":"histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"","legendFormat":"50th Percentile","refId":"B"},{"exemplar":true,"expr":"sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))","interval":"","legendFormat":"Average","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Request Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"$$hashKey":"object:210","format":"ms","label":null,"logBase":1,"max":null,"min":null,"show":true},{"$$hashKey":"object:211","format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":30,"style":"dark","tags":[],"templating":{"list":[{"current":{"selected":true,"text":"dev-cortex","value":"dev-cortex"},"description":null,"error":null,"hide":0,"includeAll":false,"label":null,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":["default/grafana"],"value":["default/grafana"]},"datasource":"$datasource","definition":"label_values(grafana_build_info, job)","description":null,"error":null,"hide":0,"includeAll":true,"label":null,"multi":true,"name":"job","options":[],"query":{"query":"label_values(grafana_build_info, job)","refId":"Billing Admin-job-Variable-Query"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(grafana_build_info, instance)","description":null,"error":null,"hide":0,"includeAll":true,"label":null,"multi":true,"name":"instance","options":[],"query":{"query":"label_values(grafana_build_info, instance)","refId":"Billing Admin-instance-Variable-Query"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": "Europe/Paris","title":"Grafana Overview","uid":"6be0s85Mk","version":2} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-coredns.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-coredns.yaml new file mode 100644 index 0000000..10dbf0b --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-coredns.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-coredns + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-coredns.json: |- + {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS dashboard by buhay.","editable":true,"gnetId":12539,"graphTooltip":0,"iteration":1603798405693,"links":[{"icon":"external link","tags":[],"targetBlank":true,"title":"CoreDNS.io","type":"link","url":"https://coredns.io"}],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":0,"y":0},"hiddenSeries":false,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (proto) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (proto)","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (total)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":8,"y":0},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2},{"alias":"other","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_type_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type) or \nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","interval":"","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (by qtype)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":16,"y":0},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (zone) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (zone)","interval":"","intervalFactor":2,"legendFormat":"{{ zone }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (by zone)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":7},"hiddenSeries":false,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_do_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_do_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m]))","interval":"","intervalFactor":2,"legendFormat":"DO","refId":"A","step":40},{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m]))","interval":"","intervalFactor":2,"legendFormat":"total","refId":"B","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (DO bit)","tooltip":{"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":6,"x":12,"y":7},"hiddenSeries":false,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"tcp:90","yaxis":2},{"alias":"tcp:99 ","yaxis":2},{"alias":"tcp:50","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (size, udp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":6,"x":18,"y":7},"hiddenSeries":false,"id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"tcp:90","yaxis":1},{"alias":"tcp:99 ","yaxis":1},{"alias":"tcp:50","yaxis":1}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (size,tcp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":14},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_response_rcode_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (rcode) or\nsum(rate(coredns_dns_responses_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (rcode)","interval":"","intervalFactor":2,"legendFormat":"{{ rcode }}","refId":"A","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (by rcode)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":14},"hiddenSeries":false,"id":32,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le, job))","format":"time_series","intervalFactor":2,"legendFormat":"99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"50%","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (duration)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":21},"hiddenSeries":false,"id":18,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"udp:50%","yaxis":1},{"alias":"tcp:50%","yaxis":2},{"alias":"tcp:90%","yaxis":2},{"alias":"tcp:99%","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","hide":false,"intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (size, udp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":21},"hiddenSeries":false,"id":20,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"udp:50%","yaxis":1},{"alias":"tcp:50%","yaxis":1},{"alias":"tcp:90%","yaxis":1},{"alias":"tcp:99%","yaxis":1}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (size, tcp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":28},"hiddenSeries":false,"id":22,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(coredns_cache_size{job=~\"$job\",instance=~\"$instance\"}) by (type) or\nsum(coredns_cache_entries{job=~\"$job\",instance=~\"$instance\"}) by (type)","interval":"","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Cache (size)","tooltip":{"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":28},"hiddenSeries":false,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"misses","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_cache_hits_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"hits:{{ type }}","refId":"A","step":40},{"expr":"sum(rate(coredns_cache_misses_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"misses","refId":"B","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Cache (hitrate)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":"10s","schemaVersion":26,"style":"dark","tags":["dns","coredns"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"includeAll":false,"label":null,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":"coredns","value":"coredns"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(coredns_dns_requests_total, job)","hide":0,"includeAll":true,"label":"Job","multi":false,"name":"job","options":[],"query":{"query":"label_values(coredns_dns_requests_total, job)","refId":"StandardVariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"allValue":".*","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(coredns_dns_requests_total{job=~\"$job\"}, instance)","hide":0,"includeAll":true,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(coredns_dns_requests_total{job=~\"$job\"}, instance)","refresh":1,"regex":"","skipUrlSync":false,"sort":3,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone":"Europe/Paris","title":"CoreDNS","uid":"vkQ0UHxik","version":3} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml new file mode 100644 index 0000000..6e1c3af --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-cluster + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-cluster.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Workloads","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to workloads","linkUrl":"/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"G"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Workloads","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to workloads","linkUrl":"/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"G"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Namespace","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Namespace: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Namespace: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Namespace","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":17,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":18,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":19,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":20,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":21,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":22,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Cluster","uid":"efa86fd1d0c121a26444b636a3f509a8","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml new file mode 100644 index 0000000..3dc40de --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-multicluster + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-multicluster.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster:node_cpu:ratio_rate5m","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=${__value.text}","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=${__value.text}","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Cluster","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Multi-Cluster","uid":"b59e6c9f2fcbe2e16d77fc492374cc4f","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml new file mode 100644 index 0000000..900d8af --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-namespace + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-namespace.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation (from requests)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation (from limits)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation (from requests)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation (from limits)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":17,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":18,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Namespace (Pods)","uid":"85a562078cdf77779eaa1add43ccec1e","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml new file mode 100644 index 0000000..352ef4f --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-node + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-node.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"max capacity","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})","format":"time_series","legendFormat":"max capacity","legendLink":null},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"max capacity","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})","format":"time_series","legendFormat":"max capacity","legendLink":null},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":true,"name":"node","options":[],"query":"label_values(kube_node_info{cluster=\"$cluster\"}, node)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Node (Pods)","uid":"200ac8fdbfbb74b39aff88118e4d1c2c","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml new file mode 100644 index 0000000..80da287 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-pod + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-pod.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"requests","color":"#F2495C","fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"limits","color":"#FF9830","fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","format":"time_series","legendFormat":"requests","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","format":"time_series","legendFormat":"limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":true,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[{"colorMode":"critical","fill":true,"line":true,"op":"gt","value":0.25,"yaxis":"left"}],"timeFrom":null,"timeShift":null,"title":"CPU Throttling","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Throttling","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"requests","color":"#F2495C","dashes":true,"fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"limits","color":"#FF9830","dashes":true,"fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","format":"time_series","legendFormat":"requests","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","format":"time_series","legendFormat":"limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (WSS)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage (WSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"Reads","legendLink":null},{"expr":"ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"Writes","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"Reads","legendLink":null},{"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"Writes","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution(Pod - Read & Writes)","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution(Containers)","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Pod","uid":"6581e46e4e5c7ba40a07646395ef7b23","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml new file mode 100644 index 0000000..14e3237 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-workload + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-workload.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Pod: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Pod: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Pod","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"workload","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}, workload)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Workload","uid":"a164a7f0339f99e89cea5cb47e9be617","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml new file mode 100644 index 0000000..bef64c3 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-k8s-resources-workloads-namespace + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + k8s-resources-workloads-namespace.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"time_series","legendFormat":"{{workload}} - {{workload_type}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Running Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=${__data.fields.workload_type}","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"time_series","legendFormat":"{{workload}} - {{workload_type}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Running Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=${__data.fields.workload_type}","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=$type","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Workload: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Workload: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Workload","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Namespace (Workloads)","uid":"a87fb0d919ec0ea5f6543124e16c42a5","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-kubelet.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-kubelet.yaml new file mode 100644 index 0000000..6dffa55 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-kubelet.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-kubelet + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + kubelet.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":2,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"title":"Running Kubelets","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":4,"y":0},"id":3,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Running Pods","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":8,"y":0},"id":4,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Running Containers","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":12,"y":0},"id":5,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Actual Volume Count","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":16,"y":0},"id":6,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Desired Volume Count","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":20,"y":0},"id":7,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Config Error Count","transparent":false,"type":"stat"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":9,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation Error Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":10,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":11,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} pod","refId":"A"},{"expr":"sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} worker","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pod Start Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":12,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} pod","refId":"A"},{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} worker","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pod Start Duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":13,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":14,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Error Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":15,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":16,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","format":"time_series","intervalFactor":2,"legendFormat":"{{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cgroup manager operation rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":17,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cgroup manager 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Pod lifecycle event generator","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":18,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":19,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist interval","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":20,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":63},"id":21,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"RPC Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":22,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Request duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":0,"y":77},"id":23,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":8,"y":77},"id":24,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":16,"y":77},"id":25,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":"instance","multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Kubelet","uid":"3138fa155d5915769fbded898ac09fd9","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml new file mode 100644 index 0000000..5372e68 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-namespace-by-pod + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + namespace-by-pod.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":0,"y":1},"height":9,"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":12,"y":1},"height":9,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"pod","value":"pod"}],"datasource":"$datasource","fill":1,"fontSize":"100%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":6,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":20},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":20},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":9,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":30},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":30},"id":11,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":40},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":40},"id":14,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Namespace (Pods)","uid":"8b7a8b326d7a6f1f04244066368c67af","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml new file mode 100644 index 0000000..15afeea --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-namespace-by-workload + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + namespace-by-workload.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"Value #G","value":"Value #G"},{"text":"Value #H","value":"Value #H"},{"text":"workload","value":"workload"}],"datasource":"$datasource","fill":1,"fontSize":"90%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Current Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"pps"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=${__value.text}","pattern":"workload","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"G","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"H","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":6,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":20},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":20},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":9,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth HIstory","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":38},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":38},"id":11,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":39},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":40},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":40},"id":14,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":40},"id":15,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":41},"id":16,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":41},"id":17,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Namespace (Workload)","uid":"bbb2a765a623ae38130206c7d94a160f","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml new file mode 100644 index 0000000..3ef479e --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-persistentvolumesusage + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + persistentvolumesusage.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","format":"time_series","intervalFactor":1,"legendFormat":"Used Space","refId":"A"},{"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","format":"time_series","intervalFactor":1,"legendFormat":"Free Space","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Volume Space Usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"$datasource","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"80, 90","title":"Volume Space Usage","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":true,"steppedLine":false,"targets":[{"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","format":"time_series","intervalFactor":1,"legendFormat":"Used inodes","refId":"A"},{"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","format":"time_series","intervalFactor":1,"legendFormat":" Free inodes","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Volume inodes Usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"none","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"$datasource","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"80, 90","title":"Volume inodes Usage","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"PersistentVolumeClaim","multi":false,"name":"volume","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-7d","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Persistent Volumes","uid":"919b92a8e8041bd567af9edab12c840c","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-pod-total.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-pod-total.yaml new file mode 100644 index 0000000..a1b0e47 --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-pod-total.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-pod-total + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + pod-total.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":0,"y":1},"height":9,"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace: $pod","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":12,"y":1},"height":9,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace: $pod","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":5,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":20},"id":8,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":21},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":21},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":21},"id":11,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":32},"id":12,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":32},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Pod","uid":"7a18067ce943a40ae25454675c19ff5c","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..2c248fc --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-prometheus + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + prometheus.json: |- + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"60s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Count","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Uptime","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"s"},{"alias":"Instance","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"instance","thresholds":[],"type":"number","unit":"short"},{"alias":"Job","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"job","thresholds":[],"type":"number","unit":"short"},{"alias":"Version","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"version","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":"","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Prometheus Stats","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Prometheus Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3","format":"time_series","legendFormat":"{{scrape_job}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Target Sync","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})","format":"time_series","legendFormat":"Targets","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Targets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Discovery","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3","format":"time_series","legendFormat":"{{interval}} configured","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Scrape Interval Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))","format":"time_series","legendFormat":"exceeded body size limit: {{job}}","legendLink":null},{"expr":"sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))","format":"time_series","legendFormat":"exceeded sample limit: {{job}}","legendLink":null},{"expr":"sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))","format":"time_series","legendFormat":"duplicate timestamp: {{job}}","legendLink":null},{"expr":"sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))","format":"time_series","legendFormat":"out of bounds: {{job}}","legendLink":null},{"expr":"sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))","format":"time_series","legendFormat":"out of order: {{job}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Scrape failures","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])","format":"time_series","legendFormat":"{{job}} {{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Appended Samples","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Retrieval","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{job}} {{instance}} head series","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Head Series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{job}} {{instance}} head chunks","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Head Chunks","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])","format":"time_series","legendFormat":"{{job}} {{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Query Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3","format":"time_series","legendFormat":"{{slice}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Stage Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Query","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["prometheus-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":".+","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","hide":0,"includeAll":true,"label":"job","multi":true,"name":"job","options":[],"query":"label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","hide":0,"includeAll":true,"label":"instance","multi":true,"name":"instance","options":[],"query":"label_values(prometheus_build_info{job=~\"$job\"}, instance)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Prometheus / Overview","uid":"","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-proxy.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-proxy.yaml new file mode 100644 index 0000000..c31d00b --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-proxy.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-proxy + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + proxy.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Up","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"min"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rules Sync Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rule Sync Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Programming Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Programming Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Kube API Request Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":8,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Post Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Get Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-proxy\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Proxy","uid":"632e265de029684c40b21cb76bca4f94","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-workload-total.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-workload-total.yaml new file mode 100644 index 0000000..63d75da --- /dev/null +++ b/monitor/prometheus/v1_ConfigMap_prometheus-community-kube-workload-total.yaml @@ -0,0 +1,22 @@ +# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: vynil-monitor + name: prometheus-community-kube-workload-total + annotations: + {} + labels: + grafana_dashboard: "1" + app: kube-prometheus-stack-grafana + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: + workload-total.json: |- + {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":5,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":6,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":11},"id":8,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth HIstory","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":12},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":12},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":21},"id":11,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":22},"id":12,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":22},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":22},"id":14,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":23},"id":15,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":23},"id":16,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"workload","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Workload","uid":"728bf77cc1166d2f3133bf25846876cc","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_Secret_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/v1_Secret_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..7f9e02d --- /dev/null +++ b/monitor/prometheus/v1_Secret_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,18 @@ +# Source: kube-prometheus-stack/templates/prometheus/secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/component: prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +data: \ No newline at end of file diff --git a/monitor/prometheus/v1_ServiceAccount_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/v1_ServiceAccount_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..8d13b33 --- /dev/null +++ b/monitor/prometheus/v1_ServiceAccount_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,19 @@ +--- +# Source: kube-prometheus-stack/templates/prometheus/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/name: kube-prometheus-stack-prometheus + app.kubernetes.io/component: prometheus + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" \ No newline at end of file diff --git a/monitor/prometheus/v1_Service_prometheus-community-kube-coredns.yaml b/monitor/prometheus/v1_Service_prometheus-community-kube-coredns.yaml new file mode 100644 index 0000000..65edb6c --- /dev/null +++ b/monitor/prometheus/v1_Service_prometheus-community-kube-coredns.yaml @@ -0,0 +1,26 @@ +# Source: kube-prometheus-stack/templates/exporters/core-dns/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-coredns + labels: + app: kube-prometheus-stack-coredns + jobLabel: coredns + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 9153 + protocol: TCP + targetPort: 9153 + selector: + k8s-app: kube-dns \ No newline at end of file diff --git a/monitor/prometheus/v1_Service_prometheus-community-kube-kube-proxy.yaml b/monitor/prometheus/v1_Service_prometheus-community-kube-kube-proxy.yaml new file mode 100644 index 0000000..56a65a4 --- /dev/null +++ b/monitor/prometheus/v1_Service_prometheus-community-kube-kube-proxy.yaml @@ -0,0 +1,27 @@ +# Source: kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-kube-proxy + labels: + app: kube-prometheus-stack-kube-proxy + jobLabel: kube-proxy + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 10249 + protocol: TCP + targetPort: 10249 + selector: + k8s-app: kube-proxy + type: ClusterIP \ No newline at end of file diff --git a/monitor/prometheus/v1_Service_prometheus-community-kube-prometheus.yaml b/monitor/prometheus/v1_Service_prometheus-community-kube-prometheus.yaml new file mode 100644 index 0000000..40713bb --- /dev/null +++ b/monitor/prometheus/v1_Service_prometheus-community-kube-prometheus.yaml @@ -0,0 +1,32 @@ +# Source: kube-prometheus-stack/templates/prometheus/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-community-kube-prometheus + namespace: vynil-monitor + labels: + app: kube-prometheus-stack-prometheus + self-monitor: "true" + + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: prometheus-community + app.kubernetes.io/version: "56.0.2" + app.kubernetes.io/part-of: kube-prometheus-stack + chart: kube-prometheus-stack-56.0.2 + release: "prometheus-community" + heritage: "Helm" +spec: + ports: + - name: http-web + port: 9090 + targetPort: 9090 + - name: reloader-web + appProtocol: http + port: 8080 + targetPort: reloader-web + publishNotReadyAddresses: false + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: prometheus-community-kube-prometheus + sessionAffinity: None + type: "ClusterIP" \ No newline at end of file