diff --git a/apps/infisical/configs.tf b/apps/infisical/configs.tf new file mode 100644 index 0000000..94adac9 --- /dev/null +++ b/apps/infisical/configs.tf @@ -0,0 +1,21 @@ +resource "kubectl_manifest" "config" { + yaml_body = <<-EOF + apiVersion: v1 + kind: ConfigMap + metadata: + name: "${var.component}-${var.instance}" + namespace: "${var.namespace}" + labels: ${jsonencode(local.common-labels)} + data: + INVITE_ONLY_SIGNUP: "" + REDIS_URL: "${module.redis.url}" + SITE_URL: "https://${local.dns_name}" + SMTP_FROM_ADDRESS: "" + SMTP_FROM_NAME: "Infisical" + SMTP_HOST: "" + SMTP_PASSWORD: "" + SMTP_PORT: "587" + SMTP_SECURE: "" + SMTP_USERNAME: "" + EOF +} diff --git a/monitor/monitor-control-plan/datas.tf b/apps/infisical/datas.tf similarity index 57% rename from monitor/monitor-control-plan/datas.tf rename to apps/infisical/datas.tf index d850fcd..d0144e6 100644 --- a/monitor/monitor-control-plan/datas.tf +++ b/apps/infisical/datas.tf @@ -1,20 +1,26 @@ locals { + authentik_url = "http://authentik.${var.domain}-auth.svc" + authentik_token = data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"] common-labels = { "vynil.solidite.fr/owner-name" = var.instance "vynil.solidite.fr/owner-namespace" = var.namespace "vynil.solidite.fr/owner-category" = var.category "vynil.solidite.fr/owner-component" = var.component "app.kubernetes.io/managed-by" = "vynil" + "app.kubernetes.io/name" = var.component "app.kubernetes.io/instance" = var.instance } } -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("v1_Service_prometheus",file))<1] +data "kubernetes_secret_v1" "authentik" { + metadata { + name = "authentik" + namespace = "${var.domain}-auth" + } } -data "kustomization_overlay" "data_no_ns" { - resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("v1_Service_prometheus",file))>0] +data "kustomization_overlay" "data" { + namespace = var.namespace + common_labels = local.common-labels + resources = [] } diff --git a/apps/infisical/deploy.tf b/apps/infisical/deploy.tf new file mode 100644 index 0000000..5a5a330 --- /dev/null +++ b/apps/infisical/deploy.tf @@ -0,0 +1,40 @@ +resource "kubectl_manifest" "deploy" { + yaml_body = <<-EOF + apiVersion: apps/v1 + kind: Deployment + metadata: + name: "${var.component}-${var.instance}" + namespace: "${var.namespace}" + labels: ${jsonencode(local.common-labels)} + annotations: + configmap.reloader.stakater.com/reload: "${kubectl_manifest.config.name}" + secret.reloader.stakater.com/reload: "${kubectl_manifest.secret.name}" + spec: + replicas: ${var.replicas} + selector: + matchLabels: ${jsonencode(local.common-labels)} + template: + metadata: + labels: ${jsonencode(local.common-labels)} + spec: + containers: + - name: infisical-backend + image: "${var.images.infiscal.registry}/${var.images.infiscal.repository}:${var.images.infiscal.tag}" + imagePullPolicy: "${var.images.infiscal.pullPolicy}" + readinessProbe: + httpGet: + path: /api/status + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + ports: + - containerPort: 8080 + name: http + protocol: TCP + envFrom: + - configMapRef: + name: "${kubectl_manifest.config.name}" + - secretRef: + name: "${kubectl_manifest.secret.name}" + EOF +} diff --git a/monitor/grafana/index.yaml b/apps/infisical/index.yaml similarity index 50% rename from monitor/grafana/index.yaml rename to apps/infisical/index.yaml index f6bc6b5..6466a6a 100644 --- a/monitor/grafana/index.yaml +++ b/apps/infisical/index.yaml @@ -1,70 +1,66 @@ --- apiVersion: vinyl.solidite.fr/v1beta1 kind: Component -category: monitor +category: apps metadata: - name: grafana + name: infisical description: null options: - domain: - default: your-company + language: + default: fr_FR examples: - - your-company + - fr_FR type: string + sub_domain: + default: to-be-set + examples: + - to-be-set + type: string + replicas: + default: 1 + examples: + - 1 + type: integer images: default: - busybox: - registry: docker.io - repository: library/busybox - tag: 1.31.1 - grafana: + infisical: pullPolicy: IfNotPresent registry: docker.io - repository: grafana/grafana - tag: 10.2.3 - sidecar: + repository: infisical/infisical + tag: v1.0.0 + redis: pullPolicy: IfNotPresent registry: quay.io - repository: kiwigrid/k8s-sidecar - tag: 1.25.2 + repository: opstree/redis + tag: v7.0.12 + redis_exporter: + pullPolicy: IfNotPresent + registry: quay.io + repository: opstree/redis-exporter + tag: v1.44.0 examples: - - busybox: - registry: docker.io - repository: library/busybox - tag: 1.31.1 - grafana: + - infisical: pullPolicy: IfNotPresent registry: docker.io - repository: grafana/grafana - tag: 10.2.3 - sidecar: + repository: infisical/infisical + tag: v1.0.0 + redis: pullPolicy: IfNotPresent registry: quay.io - repository: kiwigrid/k8s-sidecar - tag: 1.25.2 + repository: opstree/redis + tag: v7.0.12 + redis_exporter: + pullPolicy: IfNotPresent + registry: quay.io + repository: opstree/redis-exporter + tag: v1.44.0 properties: - busybox: - default: - registry: docker.io - repository: library/busybox - tag: 1.31.1 - properties: - registry: - default: docker.io - type: string - repository: - default: library/busybox - type: string - tag: - default: 1.31.1 - type: string - type: object - grafana: + infisical: default: pullPolicy: IfNotPresent registry: docker.io - repository: grafana/grafana - tag: 10.2.3 + repository: infisical/infisical + tag: v1.0.0 properties: pullPolicy: default: IfNotPresent @@ -77,18 +73,18 @@ options: default: docker.io type: string repository: - default: grafana/grafana + default: infisical/infisical type: string tag: - default: 10.2.3 + default: v1.0.0 type: string type: object - sidecar: + redis: default: pullPolicy: IfNotPresent registry: quay.io - repository: kiwigrid/k8s-sidecar - tag: 1.25.2 + repository: opstree/redis + tag: v7.0.12 properties: pullPolicy: default: IfNotPresent @@ -101,92 +97,88 @@ options: default: quay.io type: string repository: - default: kiwigrid/k8s-sidecar + default: opstree/redis type: string tag: - default: 1.25.2 + default: v7.0.12 + type: string + type: object + redis_exporter: + default: + pullPolicy: IfNotPresent + registry: quay.io + repository: opstree/redis-exporter + tag: v1.44.0 + properties: + pullPolicy: + default: IfNotPresent + enum: + - Always + - Never + - IfNotPresent + type: string + registry: + default: quay.io + type: string + repository: + default: opstree/redis-exporter + type: string + tag: + default: v1.44.0 type: string type: object type: object - ingress_class: - default: traefik + sso_vynil: + default: true examples: - - traefik + - true + type: boolean + timezone: + default: Europe/Paris + examples: + - Europe/Paris type: string domain_name: - default: your_company.com + default: your-company.com examples: - - your_company.com + - your-company.com + type: string + domain: + default: your-company + examples: + - your-company type: string issuer: default: letsencrypt-prod examples: - letsencrypt-prod type: string - admin_name: - default: grafana_admin + ingress_class: + default: traefik examples: - - grafana_admin - type: string - sub_domain: - default: grafana - examples: - - grafana + - traefik type: string app_group: - default: monitor + default: apps examples: - - monitor + - apps type: string - storage: - default: - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - description: Configure this app storage - examples: - - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - volume: - default: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - accessMode: - default: ReadWriteOnce - enum: - - ReadWriteOnce - - ReadOnlyMany - - ReadWriteMany - type: string - class: - default: '' - type: string - size: - default: 10Gi - type: string - type: - default: Filesystem - enum: - - Filesystem - - Block - type: string - type: object - type: object -dependencies: [] +dependencies: +- dist: null + category: core + component: secret-generator +- dist: null + category: dbo + component: mongo +- dist: null + category: dbo + component: redis providers: kubernetes: true authentik: true kubectl: true postgresql: null + mysql: null restapi: true http: true gitea: null diff --git a/monitor/alertmanager/presentation.tf b/apps/infisical/presentation.tf similarity index 80% rename from monitor/alertmanager/presentation.tf rename to apps/infisical/presentation.tf index 6e2aa81..f617770 100644 --- a/monitor/alertmanager/presentation.tf +++ b/apps/infisical/presentation.tf @@ -1,17 +1,22 @@ locals { dns_name = "${var.sub_domain}.${var.domain_name}" dns_names = [local.dns_name] - app_name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance) - icon = "favicon.ico" + icon = "icon.svg" request_headers = { "Content-Type" = "application/json" Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}" } - service = { - "name" = "alertmanager-operated" - "port" = { - "number" = 9093 - } +} + +module "service" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//service" + component = var.component + instance = var.instance + namespace = var.namespace + labels = local.common-labels + targets = ["http"] + providers = { + kubectl = kubectl } } @@ -25,7 +30,7 @@ module "ingress" { labels = local.common-labels dns_names = local.dns_names middlewares = [module.forward.middleware] - services = [local.service] + services = [module.service.default_definition] providers = { kubectl = kubectl } @@ -63,7 +68,7 @@ module "forward" { ingress_class = var.ingress_class labels = local.common-labels dns_names = local.dns_names - service = local.service + service = module.service.default_definition icon = local.icon request_headers = local.request_headers providers = { diff --git a/apps/infisical/secret.tf b/apps/infisical/secret.tf new file mode 100644 index 0000000..c3cb033 --- /dev/null +++ b/apps/infisical/secret.tf @@ -0,0 +1,31 @@ + +resource "kubectl_manifest" "secret" { + ignore_fields = ["metadata.annotations"] + yaml_body = <<-EOF + apiVersion: "secretgenerator.mittwald.de/v1alpha1" + kind: "StringSecret" + metadata: + name: "${var.component}-${var.instance}" + namespace: "${var.namespace}" + labels: ${jsonencode(local.common-labels)} + spec: + forceRegenerate: false + data: + MONGO_URL: "${module.mongo.url}" + fields: + - fieldName: "JWT_SIGNUP_SECRET" + length: "32" + - fieldName: "JWT_SERVICE_SECRET" + length: "32" + - fieldName: "JWT_REFRESH_SECRET" + length: "32" + - fieldName: "JWT_PROVIDER_AUTH_SECRET" + length: "32" + - fieldName: "JWT_MFA_SECRET" + length: "32" + - fieldName: "JWT_AUTH_SECRET" + length: "32" + - fieldName: "ENCRYPTION_KEY" + length: "32" + EOF +} diff --git a/apps/infisical/storage.tf b/apps/infisical/storage.tf new file mode 100644 index 0000000..0306942 --- /dev/null +++ b/apps/infisical/storage.tf @@ -0,0 +1,21 @@ +module "redis" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//redis" + component = var.component + instance = var.instance + namespace = var.namespace + labels = local.common-labels + images = var.images + providers = { + kubectl = kubectl + } +} +module "mongo" { + source = "git::https://git.solidite.fr/vynil/kydah-modules.git//mongo" + component = var.component + instance = var.instance + namespace = var.namespace + labels = local.common-labels + providers = { + kubectl = kubectl + } +} diff --git a/apps/nextcloud/index.yaml b/apps/nextcloud/index.yaml index c2c6708..57c23f2 100644 --- a/apps/nextcloud/index.yaml +++ b/apps/nextcloud/index.yaml @@ -6,21 +6,175 @@ metadata: name: nextcloud description: null options: - ingress_class: - default: traefik + issuer: + default: letsencrypt-prod examples: - - traefik + - letsencrypt-prod type: string - app_group: - default: '' + postgres: + default: + replicas: 1 examples: - - '' - type: string - openid-name: - default: vynil + - replicas: 1 + properties: + replicas: + default: 1 + type: integer + type: object + apps: + default: + audioplayer: false + bookmarks: false + bpm: false + calendar: false + collabora: false + contacts: false + deck: false + groupfolders: true + mindmap: false + music: false + notes: false + onlyoffice: false + passman: false + spreed: false + tables: false + tasks: false + texteditor: true examples: - - vynil - type: string + - audioplayer: false + bookmarks: false + bpm: false + calendar: false + collabora: false + contacts: false + deck: false + groupfolders: true + mindmap: false + music: false + notes: false + onlyoffice: false + passman: false + spreed: false + tables: false + tasks: false + texteditor: true + properties: + audioplayer: + default: false + type: boolean + bookmarks: + default: false + type: boolean + bpm: + default: false + type: boolean + calendar: + default: false + type: boolean + collabora: + default: false + type: boolean + contacts: + default: false + type: boolean + deck: + default: false + type: boolean + groupfolders: + default: true + type: boolean + mindmap: + default: false + type: boolean + music: + default: false + type: boolean + notes: + default: false + type: boolean + onlyoffice: + default: false + type: boolean + passman: + default: false + type: boolean + spreed: + default: false + type: boolean + tables: + default: false + type: boolean + tasks: + default: false + type: boolean + texteditor: + default: true + type: boolean + type: object + storage: + default: + postgres: + size: 5Gi + redis: + size: 2Gi + volume: + accessMode: ReadWriteOnce + class: '' + size: 10Gi + type: Filesystem + description: Configure this app storage + examples: + - postgres: + size: 5Gi + redis: + size: 2Gi + volume: + accessMode: ReadWriteOnce + class: '' + size: 10Gi + type: Filesystem + properties: + postgres: + default: + size: 5Gi + properties: + size: + default: 5Gi + type: string + type: object + redis: + default: + size: 2Gi + properties: + size: + default: 2Gi + type: string + type: object + volume: + default: + accessMode: ReadWriteOnce + class: '' + size: 10Gi + type: Filesystem + properties: + accessMode: + default: ReadWriteOnce + enum: + - ReadWriteOnce + - ReadOnlyMany + - ReadWriteMany + type: string + class: + default: '' + type: string + size: + default: 10Gi + type: string + type: + default: Filesystem + type: string + type: object + type: object backups: default: enable: false @@ -127,182 +281,6 @@ options: default: false type: boolean type: object - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string - storage: - default: - postgres: - size: 5Gi - redis: - size: 2Gi - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - description: Configure this app storage - examples: - - postgres: - size: 5Gi - redis: - size: 2Gi - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - postgres: - default: - size: 5Gi - properties: - size: - default: 5Gi - type: string - type: object - redis: - default: - size: 2Gi - properties: - size: - default: 2Gi - type: string - type: object - volume: - default: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - accessMode: - default: ReadWriteOnce - enum: - - ReadWriteOnce - - ReadOnlyMany - - ReadWriteMany - type: string - class: - default: '' - type: string - size: - default: 10Gi - type: string - type: - default: Filesystem - type: string - type: object - type: object - apps: - default: - audioplayer: false - bookmarks: false - bpm: false - calendar: false - collabora: false - contacts: false - deck: false - groupfolders: true - mindmap: false - music: false - notes: false - onlyoffice: false - passman: false - spreed: false - tables: false - tasks: false - texteditor: true - examples: - - audioplayer: false - bookmarks: false - bpm: false - calendar: false - collabora: false - contacts: false - deck: false - groupfolders: true - mindmap: false - music: false - notes: false - onlyoffice: false - passman: false - spreed: false - tables: false - tasks: false - texteditor: true - properties: - audioplayer: - default: false - type: boolean - bookmarks: - default: false - type: boolean - bpm: - default: false - type: boolean - calendar: - default: false - type: boolean - collabora: - default: false - type: boolean - contacts: - default: false - type: boolean - deck: - default: false - type: boolean - groupfolders: - default: true - type: boolean - mindmap: - default: false - type: boolean - music: - default: false - type: boolean - notes: - default: false - type: boolean - onlyoffice: - default: false - type: boolean - passman: - default: false - type: boolean - spreed: - default: false - type: boolean - tables: - default: false - type: boolean - tasks: - default: false - type: boolean - texteditor: - default: true - type: boolean - type: object - redis: - default: - exporter: - enabled: true - examples: - - exporter: - enabled: true - properties: - exporter: - default: - enabled: true - properties: - enabled: - default: true - type: boolean - type: object - type: object images: default: collabora: @@ -558,6 +536,38 @@ options: type: string type: object type: object + redis: + default: + exporter: + enabled: true + examples: + - exporter: + enabled: true + properties: + exporter: + default: + enabled: true + properties: + enabled: + default: true + type: boolean + type: object + type: object + admin: + default: + name: nextcloud_admin + examples: + - name: nextcloud_admin + properties: + name: + default: nextcloud_admin + type: string + type: object + app_group: + default: '' + examples: + - '' + type: string domain_name: default: your_company.com examples: @@ -583,31 +593,21 @@ options: default: 1 type: integer type: object - admin: - default: - name: nextcloud_admin + ingress_class: + default: traefik examples: - - name: nextcloud_admin - properties: - name: - default: nextcloud_admin - type: string - type: object - postgres: - default: - replicas: 1 - examples: - - replicas: 1 - properties: - replicas: - default: 1 - type: integer - type: object + - traefik + type: string domain: default: your-company examples: - your-company type: string + openid-name: + default: vynil + examples: + - vynil + type: string sub_domain: default: files examples: @@ -628,7 +628,8 @@ providers: authentik: true kubectl: true postgresql: null + mysql: null restapi: null http: null gitea: null -tfaddtype: null +tfaddtype: true diff --git a/monitor/alertmanager/datas.tf b/monitor/alertmanager/datas.tf deleted file mode 100644 index b7cc2bc..0000000 --- a/monitor/alertmanager/datas.tf +++ /dev/null @@ -1,167 +0,0 @@ -locals { - authentik_url = "http://authentik.${var.domain}-auth.svc" - authentik_token = data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"] - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - rb-patch = <<-EOF - - op: replace - path: /subjects/0/namespace - value: "${var.namespace}" - EOF -} - -data "kubernetes_secret_v1" "authentik" { - metadata { - name = "authentik" - namespace = "${var.domain}-auth" - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] - patches { - target { - kind = "Alertmanager" - name = "alertmanager-kube-promethe-alertmanager" - } - patch = <<-EOF - apiVersion: monitoring.coreos.com/v1 - kind: Alertmanager - metadata: - name: alertmanager-kube-promethe-alertmanager - spec: - image: "${var.images.alertmanager.registry}/${var.images.alertmanager.repository}:${var.images.alertmanager.tag}" - version: ${var.images.alertmanager.tag} - externalUrl: http://${var.component}-${var.instance}.${var.namespace}:9093 - replicas: ${var.replicas} - listenLocal: ${var.listenLocal} - logLevel: "${var.logLevel}" - retention: "${var.retention}" - EOF - } - patches { - target { - kind = "ConfigMap" - name = "alertmanager-kube-grafana-datasource" - } - patch = <<-EOF - apiVersion: v1 - kind: ConfigMap - metadata: - name: alertmanager-operated - data: - datasource.yaml: |- - apiVersion: 1 - datasources: - - name: Alertmanager - type: alertmanager - uid: alertmanager - url: http://${var.component}-${var.instance}.${var.namespace}:9093/ - access: proxy - jsonData: - handleGrafanaManagedAlerts: false - implementation: prometheus - EOF - } - patches { - target { - kind = "ServiceMonitor" - name = "alertmanager-kube-promethe-alertmanager" - } - patch = <<-EOF - - op: replace - path: /spec/namespaceSelector/matchNames/0 - value: "${var.namespace}" - EOF - } - patches { - target { - kind = "PrometheusRule" - name = "alertmanager-kube-promethe-alertmanager.rules" - } - patch = <<-EOF -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: alertmanager-kube-promethe-alertmanager.rules -spec: - groups: - - name: alertmanager.rules - rules: - - alert: AlertmanagerFailedReload - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) == 0 - - alert: AlertmanagerMembersInconsistent - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) - < on (namespace,service,cluster) group_left - count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m])) - - alert: AlertmanagerFailedToSendAlerts - expr: |- - ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) - ) - > 0.01 - - alert: AlertmanagerClusterFailedToSendAlerts - expr: |- - min by (namespace,service, integration) ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration=~`.*`}[5m]) - ) - > 0.01 - - alert: AlertmanagerClusterFailedToSendAlerts - expr: |- - min by (namespace,service, integration) ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}", integration!~`.*`}[5m]) - ) - > 0.01 - - alert: AlertmanagerConfigInconsistent - expr: |- - count by (namespace,service,cluster) ( - count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}) - ) - != 1 - - alert: AlertmanagerClusterDown - expr: |- - ( - count by (namespace,service,cluster) ( - avg_over_time(up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[5m]) < 0.5 - ) - / - count by (namespace,service,cluster) ( - up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"} - ) - ) - >= 0.5 - - alert: AlertmanagerClusterCrashlooping - expr: |- - ( - count by (namespace,service,cluster) ( - changes(process_start_time_seconds{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"}[10m]) > 4 - ) - / - count by (namespace,service,cluster) ( - up{job="alertmanager-kube-promethe-alertmanager",namespace="${var.namespace}"} - ) - ) - >= 0.5 - EOF - } -} diff --git a/monitor/alertmanager/index.yaml b/monitor/alertmanager/index.yaml deleted file mode 100644 index dcd8dfa..0000000 --- a/monitor/alertmanager/index.yaml +++ /dev/null @@ -1,110 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: alertmanager - description: null -options: - replicas: - default: 1 - examples: - - 1 - type: integer - app_group: - default: monitor - examples: - - monitor - type: string - listenLocal: - default: false - examples: - - false - type: boolean - domain_name: - default: your_company.com - examples: - - your_company.com - type: string - domain: - default: your-company - examples: - - your-company - type: string - images: - default: - alertmanager: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/alertmanager - tag: v0.26.0 - examples: - - alertmanager: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/alertmanager - tag: v0.26.0 - properties: - alertmanager: - default: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/alertmanager - tag: v0.26.0 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: quay.io - type: string - repository: - default: prometheus/alertmanager - type: string - tag: - default: v0.26.0 - type: string - type: object - type: object - logLevel: - default: info - examples: - - info - type: string - ingress_class: - default: traefik - examples: - - traefik - type: string - sub_domain: - default: alertmanager - examples: - - alertmanager - type: string - retention: - default: 120h - examples: - - 120h - type: string - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string -dependencies: -- dist: null - category: share - component: authentik-forward -providers: - kubernetes: true - authentik: true - kubectl: true - postgresql: null - restapi: true - http: true - gitea: null -tfaddtype: null diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_alertmanager-kube-promethe-alertmanager.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_alertmanager-kube-promethe-alertmanager.yaml deleted file mode 100644 index 57e40c6..0000000 --- a/monitor/alertmanager/monitoring.coreos.com_v1_Alertmanager_alertmanager-kube-promethe-alertmanager.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Source: kube-prometheus-stack/templates/alertmanager/alertmanager.yaml -apiVersion: monitoring.coreos.com/v1 -kind: Alertmanager -metadata: - name: alertmanager-kube-promethe-alertmanager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-alertmanager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -spec: - image: "quay.io/prometheus/alertmanager:v0.26.0" - version: v0.26.0 - replicas: 1 - listenLocal: false - serviceAccountName: alertmanager-kube-promethe-alertmanager - externalUrl: http://alertmanager-kube-promethe-alertmanager.vynil-monitor:9093 - paused: false - logFormat: "logfmt" - logLevel: "info" - retention: "120h" - alertmanagerConfigSelector: {} - alertmanagerConfigNamespaceSelector: {} - routePrefix: "/" - securityContext: - fsGroup: 2000 - runAsGroup: 2000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - portName: http-web \ No newline at end of file diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_alertmanager-kube-promethe-alertmanager.rules.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_alertmanager-kube-promethe-alertmanager.rules.yaml deleted file mode 100644 index 6d8850a..0000000 --- a/monitor/alertmanager/monitoring.coreos.com_v1_PrometheusRule_alertmanager-kube-promethe-alertmanager.rules.yaml +++ /dev/null @@ -1,142 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: alertmanager-kube-promethe-alertmanager.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -spec: - groups: - - name: alertmanager.rules - rules: - - alert: AlertmanagerFailedReload - annotations: - description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload - summary: Reloading an Alertmanager configuration has failed. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) == 0 - for: 10m - labels: - severity: critical - - alert: AlertmanagerMembersInconsistent - annotations: - description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent - summary: A member of an Alertmanager cluster has not found all other cluster members. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) - < on (namespace,service,cluster) group_left - count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m])) - for: 15m - labels: - severity: critical - - alert: AlertmanagerFailedToSendAlerts - annotations: - description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts - summary: An Alertmanager instance failed to send notifications. - expr: |- - ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) - ) - > 0.01 - for: 5m - labels: - severity: warning - - alert: AlertmanagerClusterFailedToSendAlerts - annotations: - description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts - summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. - expr: |- - min by (namespace,service, integration) ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration=~`.*`}[5m]) - ) - > 0.01 - for: 5m - labels: - severity: critical - - alert: AlertmanagerClusterFailedToSendAlerts - annotations: - description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts - summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. - expr: |- - min by (namespace,service, integration) ( - rate(alertmanager_notifications_failed_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m]) - / - ignoring (reason) group_left rate(alertmanager_notifications_total{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor", integration!~`.*`}[5m]) - ) - > 0.01 - for: 5m - labels: - severity: warning - - alert: AlertmanagerConfigInconsistent - annotations: - description: Alertmanager instances within the {{$labels.job}} cluster have different configurations. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent - summary: Alertmanager instances within the same cluster have different configurations. - expr: |- - count by (namespace,service,cluster) ( - count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}) - ) - != 1 - for: 20m - labels: - severity: critical - - alert: AlertmanagerClusterDown - annotations: - description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown - summary: Half or more of the Alertmanager instances within the same cluster are down. - expr: |- - ( - count by (namespace,service,cluster) ( - avg_over_time(up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[5m]) < 0.5 - ) - / - count by (namespace,service,cluster) ( - up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"} - ) - ) - >= 0.5 - for: 5m - labels: - severity: critical - - alert: AlertmanagerClusterCrashlooping - annotations: - description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping - summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. - expr: |- - ( - count by (namespace,service,cluster) ( - changes(process_start_time_seconds{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"}[10m]) > 4 - ) - / - count by (namespace,service,cluster) ( - up{job="alertmanager-kube-promethe-alertmanager",namespace="vynil-monitor"} - ) - ) - >= 0.5 - for: 5m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_alertmanager-kube-promethe-alertmanager.yaml b/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_alertmanager-kube-promethe-alertmanager.yaml deleted file mode 100644 index 6954e15..0000000 --- a/monitor/alertmanager/monitoring.coreos.com_v1_ServiceMonitor_alertmanager-kube-promethe-alertmanager.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Source: kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: alertmanager-kube-promethe-alertmanager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-alertmanager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -spec: - - selector: - matchLabels: - app: kube-prometheus-stack-alertmanager - release: "alertmanager" - self-monitor: "true" - namespaceSelector: - matchNames: - - "vynil-monitor" - endpoints: - - port: http-web - enableHttp2: true - path: "/metrics" - - port: reloader-web - scheme: http - path: "/metrics" \ No newline at end of file diff --git a/monitor/alertmanager/svc.tf b/monitor/alertmanager/svc.tf deleted file mode 100644 index c5220bb..0000000 --- a/monitor/alertmanager/svc.tf +++ /dev/null @@ -1,33 +0,0 @@ -locals { - svc-label = merge(local.common-labels, { - "app" = "kube-prometheus-stack-prometheus" - "release" = "prometheus" - "self-monitor" = "true" - - }) -} -resource "kubectl_manifest" "svc" { - yaml_body = <<-EOF - apiVersion: v1 - kind: Service - metadata: - name: "${var.component}-${var.instance}" - namespace: "${var.namespace}" - labels: ${jsonencode(local.svc-label)} - spec: - ports: - - name: http-web - port: 9093 - targetPort: 9093 - protocol: TCP - - name: reloader-web - appProtocol: http - port: 8080 - targetPort: reloader-web - selector: - app.kubernetes.io/name: alertmanager - alertmanager: alertmanager-kube-promethe-alertmanager - sessionAffinity: None - type: "ClusterIP" - EOF -} diff --git a/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml b/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml deleted file mode 100644 index be5fdf9..0000000 --- a/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-grafana-datasource.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: alertmanager-kube-grafana-datasource - labels: - grafana_datasource: "1" - app: alertmanager -data: - datasource.yaml: |- - apiVersion: 1 - datasources: - - name: Alertmanager - type: alertmanager - uid: alertmanager - url: http://alertmanager-operated.vynil-monitor:9093/ - access: proxy - jsonData: - handleGrafanaManagedAlerts: false - implementation: prometheus \ No newline at end of file diff --git a/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-promethe-alertmanager-overview.yaml b/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-promethe-alertmanager-overview.yaml deleted file mode 100644 index bd706b9..0000000 --- a/monitor/alertmanager/v1_ConfigMap_alertmanager-kube-promethe-alertmanager-overview.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: alertmanager-kube-promethe-alertmanager-overview - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -data: - alertmanager-overview.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"current set of alerts stored in the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate of successful and invalid alerts received by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Received","refId":"A"},{"expr":"sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Invalid","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts receive rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Alerts","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate of successful and invalid notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Total","refId":"A"},{"expr":"sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Failed","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$integration: Notifications Send Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"latency of notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} 99th Percentile","refId":"A"},{"expr":"histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Median","refId":"B"},{"expr":"sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Average","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$integration: Notification Duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Notifications","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["alertmanager-mixin"],"templating":{"list":[{"current":{"text":"Prometheus","value":"Prometheus"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(alertmanager_alerts, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":"service","multi":false,"name":"service","options":[],"query":"label_values(alertmanager_alerts, service)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"all","value":"$__all"},"datasource":"$datasource","hide":2,"includeAll":true,"label":null,"multi":false,"name":"integration","options":[],"query":"label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Alertmanager / Overview","uid":"alertmanager-overview","version":0} \ No newline at end of file diff --git a/monitor/alertmanager/v1_Secret_alertmanager-alertmanager-kube-promethe-alertmanager.yaml b/monitor/alertmanager/v1_Secret_alertmanager-alertmanager-kube-promethe-alertmanager.yaml deleted file mode 100644 index 86c3471..0000000 --- a/monitor/alertmanager/v1_Secret_alertmanager-alertmanager-kube-promethe-alertmanager.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Source: kube-prometheus-stack/templates/alertmanager/secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: alertmanager-alertmanager-kube-promethe-alertmanager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-alertmanager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -data: - alertmanager.yaml: "Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KaW5oaWJpdF9ydWxlczoKLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIC0gYWxlcnRuYW1lCiAgc291cmNlX21hdGNoZXJzOgogIC0gc2V2ZXJpdHkgPSBjcml0aWNhbAogIHRhcmdldF9tYXRjaGVyczoKICAtIHNldmVyaXR5ID1+IHdhcm5pbmd8aW5mbwotIGVxdWFsOgogIC0gbmFtZXNwYWNlCiAgLSBhbGVydG5hbWUKICBzb3VyY2VfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IHdhcm5pbmcKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIHNvdXJjZV9tYXRjaGVyczoKICAtIGFsZXJ0bmFtZSA9IEluZm9JbmhpYml0b3IKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBhbGVydG5hbWUgPSBJbmZvSW5oaWJpdG9yCnJlY2VpdmVyczoKLSBuYW1lOiAibnVsbCIKcm91dGU6CiAgZ3JvdXBfYnk6CiAgLSBuYW1lc3BhY2UKICBncm91cF9pbnRlcnZhbDogNW0KICBncm91cF93YWl0OiAzMHMKICByZWNlaXZlcjogIm51bGwiCiAgcmVwZWF0X2ludGVydmFsOiAxMmgKICByb3V0ZXM6CiAgLSBtYXRjaGVyczoKICAgIC0gYWxlcnRuYW1lID0gIldhdGNoZG9nIgogICAgcmVjZWl2ZXI6ICJudWxsIgp0ZW1wbGF0ZXM6Ci0gL2V0Yy9hbGVydG1hbmFnZXIvY29uZmlnLyoudG1wbA==" \ No newline at end of file diff --git a/monitor/alertmanager/v1_ServiceAccount_alertmanager-kube-promethe-alertmanager.yaml b/monitor/alertmanager/v1_ServiceAccount_alertmanager-kube-promethe-alertmanager.yaml deleted file mode 100644 index 5c9a310..0000000 --- a/monitor/alertmanager/v1_ServiceAccount_alertmanager-kube-promethe-alertmanager.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -# Source: kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: alertmanager-kube-promethe-alertmanager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-alertmanager - app.kubernetes.io/name: kube-prometheus-stack-alertmanager - app.kubernetes.io/component: alertmanager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: alertmanager - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "alertmanager" - heritage: "Helm" -automountServiceAccountToken: true \ No newline at end of file diff --git a/monitor/alerts-containers/datas.tf b/monitor/alerts-containers/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/alerts-containers/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/alerts-containers/index.yaml b/monitor/alerts-containers/index.yaml deleted file mode 100644 index d81fe2e..0000000 --- a/monitor/alerts-containers/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: alerts-containers - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml deleted file mode 100644 index 930463c..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-cpu-usage-seconds.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-cpu-usage-seconds - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_cpu_usage_seconds_total - rules: - - expr: |- - sum by (cluster, namespace, pod, container) ( - irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) - ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( - 1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate \ No newline at end of file diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml deleted file mode 100644 index 6ee5179..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-cache.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-memory-cache - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_memory_cache - rules: - - expr: |- - container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, - max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_cache \ No newline at end of file diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml deleted file mode 100644 index f513eea..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-rss.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-memory-rss - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_memory_rss - rules: - - expr: |- - container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, - max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_rss \ No newline at end of file diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml deleted file mode 100644 index b204e46..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-swap.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-memory-swap - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_memory_swap - rules: - - expr: |- - container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, - max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_swap \ No newline at end of file diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml deleted file mode 100644 index 038cc75..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-memory-working-se.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-memory-working-se - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_memory_working_set_bytes - rules: - - expr: |- - container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, - max by (cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_working_set_bytes \ No newline at end of file diff --git a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml b/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml deleted file mode 100644 index e493c25..0000000 --- a/monitor/alerts-containers/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.container-resource.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.container-resource - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.container_resource - rules: - - expr: |- - kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) - group_left() max by (namespace, pod, cluster) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} - ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_requests:sum - - expr: |- - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) - group_left() max by (namespace, pod, cluster) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} - ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_cpu:kube_pod_container_resource_requests:sum - - expr: |- - kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) - group_left() max by (namespace, pod, cluster) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} - ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_limits:sum - - expr: |- - kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) - group_left() max by (namespace, pod, cluster) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} - ) * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_cpu:kube_pod_container_resource_limits:sum \ No newline at end of file diff --git a/monitor/alerts-core/datas.tf b/monitor/alerts-core/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/alerts-core/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/alerts-core/index.yaml b/monitor/alerts-core/index.yaml deleted file mode 100644 index 6663b77..0000000 --- a/monitor/alerts-core/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: alerts-core - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml deleted file mode 100644 index 989f4a9..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-config-reloaders.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-config-reloaders - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: config-reloaders - rules: - - alert: ConfigReloaderSidecarErrors - annotations: - description: 'Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace. - - As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors - summary: config-reloader sidecar has not had a successful reload for 10m - expr: max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0 - for: 10m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml deleted file mode 100644 index 65fc0e0..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-general.rules.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-general.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: general.rules - rules: - - alert: TargetDown - annotations: - description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown - summary: One or more targets are unreachable. - expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up) BY (cluster, job, namespace, service)) > 10 - for: 10m - labels: - severity: warning - - alert: Watchdog - annotations: - description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. - - This alert is always firing, therefore it should always be firing in Alertmanager - - and always fire against a receiver. There are integrations with various notification - - mechanisms that send a notification when this alert is not firing. For example the - - "DeadMansSnitch" integration in PagerDuty. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog - summary: An alert that should always be firing to certify that Alertmanager is working properly. - expr: vector(1) - labels: - severity: none - - alert: InfoInhibitor - annotations: - description: 'This is an alert that is used to inhibit info alerts. - - By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with - - other alerts. - - This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a - - severity of ''warning'' or ''critical'' starts firing on the same namespace. - - This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor - summary: Info-level alert inhibition. - expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 - labels: - severity: none \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml deleted file mode 100644 index 3dd118b..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-k8s.rules.pod-owner.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-k8s.rules.pod-owner - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: k8s.rules.pod_owner - rules: - - expr: |- - max by (cluster, namespace, workload, pod) ( - label_replace( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, - "replicaset", "$1", "owner_name", "(.*)" - ) * on (replicaset, namespace) group_left(owner_name) topk by (replicaset, namespace) ( - 1, max by (replicaset, namespace, owner_name) ( - kube_replicaset_owner{job="kube-state-metrics"} - ) - ), - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: deployment - record: namespace_workload_pod:kube_pod_owner:relabel - - expr: |- - max by (cluster, namespace, workload, pod) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: daemonset - record: namespace_workload_pod:kube_pod_owner:relabel - - expr: |- - max by (cluster, namespace, workload, pod) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: statefulset - record: namespace_workload_pod:kube_pod_owner:relabel - - expr: |- - max by (cluster, namespace, workload, pod) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: job - record: namespace_workload_pod:kube_pod_owner:relabel \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml deleted file mode 100644 index 2d1e4fd..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-general.rules.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-prometheus-general.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-prometheus-general.rules - rules: - - expr: count without(instance, pod, node) (up == 1) - record: count:up1 - - expr: count without(instance, pod, node) (up == 0) - record: count:up0 \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml deleted file mode 100644 index 4c24bf0..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-prometheus-node-recording.rules.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-prometheus-node-recording.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-prometheus-node-recording.rules - rules: - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) - record: instance:node_cpu:rate:sum - - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - record: instance:node_network_receive_bytes:rate:sum - - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - record: instance:node_network_transmit_bytes:rate:sum - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) - record: instance:node_cpu:ratio - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) - record: cluster:node_cpu:sum_rate5m - - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - record: cluster:node_cpu:ratio \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml deleted file mode 100644 index 84cc680..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-apps.yaml +++ /dev/null @@ -1,258 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-apps - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-apps - rules: - - alert: KubePodCrashLooping - annotations: - description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping - summary: Pod is crash looping. - expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~".*"}[5m]) >= 1 - for: 15m - labels: - severity: warning - - alert: KubePodNotReady - annotations: - description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready - summary: Pod has been in a non-ready state for more than 15 minutes. - expr: |- - sum by (namespace, pod, cluster) ( - max by (namespace, pod, cluster) ( - kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"} - ) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) ( - 1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) - ) - ) > 0 - for: 15m - labels: - severity: warning - - alert: KubeDeploymentGenerationMismatch - annotations: - description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch - summary: Deployment generation mismatch due to possible roll-back - expr: |- - kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"} - != - kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"} - for: 15m - labels: - severity: warning - - alert: KubeDeploymentReplicasMismatch - annotations: - description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch - summary: Deployment has not matched the expected number of replicas. - expr: |- - ( - kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"} - > - kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"} - ) and ( - changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeDeploymentRolloutStuck - annotations: - description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck - summary: Deployment rollout is not progressing. - expr: |- - kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"} - != 0 - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetReplicasMismatch - annotations: - description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch - summary: StatefulSet has not matched the expected number of replicas. - expr: |- - ( - kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"} - != - kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"} - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetGenerationMismatch - annotations: - description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch - summary: StatefulSet generation mismatch due to possible roll-back - expr: |- - kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"} - != - kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"} - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetUpdateNotRolledOut - annotations: - description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout - summary: StatefulSet update has not been rolled out. - expr: |- - ( - max without (revision) ( - kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"} - unless - kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"} - ) - * - ( - kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"} - != - kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"} - ) - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeDaemonSetRolloutStuck - annotations: - description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck - summary: DaemonSet rollout is stuck. - expr: |- - ( - ( - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} - ) or ( - kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} - != - 0 - ) or ( - kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} - ) or ( - kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} - ) - ) and ( - changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeContainerWaiting - annotations: - description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting - summary: Pod container waiting longer than 1 hour - expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~".*"}) > 0 - for: 1h - labels: - severity: warning - - alert: KubeDaemonSetNotScheduled - annotations: - description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled - summary: DaemonSet pods are not scheduled. - expr: |- - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} - - - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0 - for: 10m - labels: - severity: warning - - alert: KubeDaemonSetMisScheduled - annotations: - description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled - summary: DaemonSet pods are misscheduled. - expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} > 0 - for: 15m - labels: - severity: warning - - alert: KubeJobNotCompleted - annotations: - description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted - summary: Job did not complete in time - expr: |- - time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"} - and - kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200 - labels: - severity: warning - - alert: KubeJobFailed - annotations: - description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed - summary: Job failed to complete. - expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0 - for: 15m - labels: - severity: warning - - alert: KubeHpaReplicasMismatch - annotations: - description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch - summary: HPA has not matched desired number of replicas. - expr: |- - (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"} - != - kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}) - and - (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} - > - kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"}) - and - (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} - < - kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}) - and - changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0 - for: 15m - labels: - severity: warning - - alert: KubeHpaMaxedOut - annotations: - description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout - summary: HPA is running at max replicas - expr: |- - kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} - == - kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"} - for: 15m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml deleted file mode 100644 index 25c5258..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-resources.yaml +++ /dev/null @@ -1,122 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-resources - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-resources - rules: - - alert: KubeCPUOvercommit - annotations: - description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit - summary: Cluster has overcommitted CPU resource requests. - expr: |- - sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="kube-state-metrics",}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 - and - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 - for: 10m - labels: - severity: warning - - alert: KubeMemoryOvercommit - annotations: - description: Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit - summary: Cluster has overcommitted memory resource requests. - expr: |- - sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 - and - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 - for: 10m - labels: - severity: warning - - alert: KubeCPUQuotaOvercommit - annotations: - description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Namespaces. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit - summary: Cluster has overcommitted CPU resource requests. - expr: |- - sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster) - / - sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster) - > 1.5 - for: 5m - labels: - severity: warning - - alert: KubeMemoryQuotaOvercommit - annotations: - description: Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Namespaces. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit - summary: Cluster has overcommitted memory resource requests. - expr: |- - sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster) - / - sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - > 1.5 - for: 5m - labels: - severity: warning - - alert: KubeQuotaAlmostFull - annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull - summary: Namespace quota is going to be full. - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 0.9 < 1 - for: 15m - labels: - severity: info - - alert: KubeQuotaFullyUsed - annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused - summary: Namespace quota is fully used. - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - == 1 - for: 15m - labels: - severity: info - - alert: KubeQuotaExceeded - annotations: - description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded - summary: Namespace quota has exceeded the limits. - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 1 - for: 15m - labels: - severity: warning - - alert: CPUThrottlingHigh - annotations: - description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh - summary: Processes experience elevated CPU throttling. - expr: |- - sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace) - / - sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace) - > ( 25 / 100 ) - for: 15m - labels: - severity: info \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml deleted file mode 100644 index c9da142..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-storage.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-storage - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-storage - rules: - - alert: KubePersistentVolumeFillingUp - annotations: - description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is only {{ $value | humanizePercentage }} free. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup - summary: PersistentVolume is filling up. - expr: |- - ( - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - ) < 0.03 - and - kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1m - labels: - severity: critical - - alert: KubePersistentVolumeFillingUp - annotations: - description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup - summary: PersistentVolume is filling up. - expr: |- - ( - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - ) < 0.15 - and - kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 - and - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1h - labels: - severity: warning - - alert: KubePersistentVolumeInodesFillingUp - annotations: - description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} only has {{ $value | humanizePercentage }} free inodes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup - summary: PersistentVolumeInodes are filling up. - expr: |- - ( - kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} - / - kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - ) < 0.03 - and - kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1m - labels: - severity: critical - - alert: KubePersistentVolumeInodesFillingUp - annotations: - description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup - summary: PersistentVolumeInodes are filling up. - expr: |- - ( - kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} - / - kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} - ) < 0.15 - and - kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 - and - predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (cluster, namespace, persistentvolumeclaim) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1h - labels: - severity: warning - - alert: KubePersistentVolumeErrors - annotations: - description: The persistent volume {{ $labels.persistentvolume }} on Cluster {{ $labels.cluster }} has status {{ $labels.phase }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors - summary: PersistentVolume is having issues with provisioning. - expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 - for: 5m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml deleted file mode 100644 index 9c46a59..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-apiserver.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system-apiserver - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-apiserver - rules: - - alert: KubeClientCertificateExpiration - annotations: - description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration - summary: Client certificate is about to expire. - expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on (job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 - for: 5m - labels: - severity: warning - - alert: KubeClientCertificateExpiration - annotations: - description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration - summary: Client certificate is about to expire. - expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on (job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 - for: 5m - labels: - severity: critical - - alert: KubeAggregatedAPIErrors - annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors - summary: Kubernetes aggregated API has reported errors. - expr: sum by (name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 - labels: - severity: warning - - alert: KubeAggregatedAPIDown - annotations: - description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown - summary: Kubernetes aggregated API is down. - expr: (1 - max by (name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85 - for: 5m - labels: - severity: warning - - alert: KubeAPITerminatedRequests - annotations: - description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests - summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. - expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 - for: 5m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml deleted file mode 100644 index 56174cd..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-kubelet.yaml +++ /dev/null @@ -1,140 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system-kubelet - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-kubelet - rules: - - alert: KubeNodeNotReady - annotations: - description: '{{ $labels.node }} has been unready for more than 15 minutes.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready - summary: Node is not ready. - expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 15m - labels: - severity: warning - - alert: KubeNodeUnreachable - annotations: - description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable - summary: Node is unreachable. - expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 - for: 15m - labels: - severity: warning - - alert: KubeletTooManyPods - annotations: - description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods - summary: Kubelet is running at capacity. - expr: |- - count by (cluster, node) ( - (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,cluster) group_left(node) topk by (instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"}) - ) - / - max by (cluster, node) ( - kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 - ) > 0.95 - for: 15m - labels: - severity: info - - alert: KubeNodeReadinessFlapping - annotations: - description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping - summary: Node readiness status is flapping. - expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2 - for: 15m - labels: - severity: warning - - alert: KubeletPlegDurationHigh - annotations: - description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh - summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. - expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 - for: 5m - labels: - severity: warning - - alert: KubeletPodStartUpLatencyHigh - annotations: - description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh - summary: Kubelet Pod startup latency is too high. - expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 - for: 15m - labels: - severity: warning - - alert: KubeletClientCertificateExpiration - annotations: - description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration - summary: Kubelet client certificate is about to expire. - expr: kubelet_certificate_manager_client_ttl_seconds < 604800 - labels: - severity: warning - - alert: KubeletClientCertificateExpiration - annotations: - description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration - summary: Kubelet client certificate is about to expire. - expr: kubelet_certificate_manager_client_ttl_seconds < 86400 - labels: - severity: critical - - alert: KubeletServerCertificateExpiration - annotations: - description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration - summary: Kubelet server certificate is about to expire. - expr: kubelet_certificate_manager_server_ttl_seconds < 604800 - labels: - severity: warning - - alert: KubeletServerCertificateExpiration - annotations: - description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration - summary: Kubelet server certificate is about to expire. - expr: kubelet_certificate_manager_server_ttl_seconds < 86400 - labels: - severity: critical - - alert: KubeletClientCertificateRenewalErrors - annotations: - description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes). - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors - summary: Kubelet has failed to renew its client certificate. - expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: KubeletServerCertificateRenewalErrors - annotations: - description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes). - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors - summary: Kubelet has failed to renew its server certificate. - expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: KubeletDown - annotations: - description: Kubelet has disappeared from Prometheus target discovery. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletdown - summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml b/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml deleted file mode 100644 index 3ee7bd3..0000000 --- a/monitor/alerts-core/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system - rules: - - alert: KubeVersionMismatch - annotations: - description: There are {{ $value }} different semantic versions of Kubernetes components running. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch - summary: Different semantic versions of Kubernetes components running. - expr: count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 - for: 15m - labels: - severity: warning - - alert: KubeClientErrors - annotations: - description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors - summary: Kubernetes API server client is experiencing errors. - expr: |- - (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by (cluster, instance, job, namespace) - / - sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by (cluster, instance, job, namespace)) - > 0.01 - for: 15m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/dashboards-cluster/datas.tf b/monitor/dashboards-cluster/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/dashboards-cluster/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/dashboards-cluster/index.yaml b/monitor/dashboards-cluster/index.yaml deleted file mode 100644 index 939e019..0000000 --- a/monitor/dashboards-cluster/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: dashboards-cluster - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml b/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml deleted file mode 100644 index 44f86a8..0000000 --- a/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-cluster.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-cluster - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-cluster.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Workloads","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to workloads","linkUrl":"/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"G"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Workloads","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to workloads","linkUrl":"/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields.namespace}","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"G"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Namespace","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Namespace: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Namespace: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Namespace","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":17,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":18,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":19,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":20,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":21,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":22,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Cluster","uid":"efa86fd1d0c121a26444b636a3f509a8","version":0} \ No newline at end of file diff --git a/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml b/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml deleted file mode 100644 index 6c41d99..0000000 --- a/monitor/dashboards-cluster/v1_ConfigMap_prometheus-community-kube-k8s-resources-multicluster.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-multicluster - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-multicluster.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster:node_cpu:ratio_rate5m","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=${__value.text}","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=${__value.text}","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Cluster","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Multi-Cluster","uid":"b59e6c9f2fcbe2e16d77fc492374cc4f","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/datas.tf b/monitor/dashboards-minimal/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/dashboards-minimal/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/dashboards-minimal/index.yaml b/monitor/dashboards-minimal/index.yaml deleted file mode 100644 index 3b8efb5..0000000 --- a/monitor/dashboards-minimal/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: dashboards-minimal - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml deleted file mode 100644 index e812711..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-cluster-total.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-cluster-total - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - cluster-total.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"Value #G","value":"Value #G"},{"text":"Value #H","value":"Value #H"},{"text":"namespace","value":"namespace"}],"datasource":"$datasource","fill":1,"fontSize":"90%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Current Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"pps"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=${__value.text}","pattern":"namespace","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"G","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"H","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":6,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":11},"id":9,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth History","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":12},"id":10,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":21},"id":11,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":31},"id":13,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":40},"id":14,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":15,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":50},"id":16,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":17,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))","format":"time_series","intervalFactor":1,"legendFormat":"{{namespace}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":18,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[{"targetBlank":true,"title":"What is TCP Retransmit?","url":"https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/"}],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of TCP Retransmits out of all sent segments","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":24,"x":0,"y":59},"id":19,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":2,"links":[{"targetBlank":true,"title":"Why monitor SYN retransmits?","url":"https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365"}],"minSpan":24,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of TCP SYN Retransmits out of all retransmits","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Cluster","uid":"ff635a025bcfea7bc3dd4f508990a3e9","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml deleted file mode 100644 index 8695870..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-grafana-overview.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-grafana-overview - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - grafana-overview.json: |- - {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":3085,"iteration":1631554945276,"links":[],"panels":[{"datasource":"$datasource","fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.1.3","targets":[{"expr":"grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Firing Alerts","type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":6,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.1.3","targets":[{"expr":"sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})","interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Dashboards","type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{"align":null,"displayMode":"auto"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":12,"x":12,"y":0},"id":10,"options":{"showHeader":true},"pluginVersion":"8.1.3","targets":[{"expr":"grafana_build_info{job=~\"$job\", instance=~\"$instance\"}","instant":true,"interval":"","legendFormat":"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Build Info","transformations":[{"id":"labelsToFields","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true,"Value":true,"branch":true,"container":true,"goversion":true,"namespace":true,"pod":true,"revision":true},"indexByName":{"Time":7,"Value":11,"branch":4,"container":8,"edition":2,"goversion":6,"instance":1,"job":0,"namespace":9,"pod":10,"revision":5,"version":3},"renameByName":{}}}],"type":"table"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":5},"hiddenSeries":false,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"8.1.3","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ","interval":"","legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"RPS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"$$hashKey":"object:157","format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"$$hashKey":"object:158","format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":5},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"8.1.3","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"exemplar":true,"expr":"histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"","legendFormat":"99th Percentile","refId":"A"},{"exemplar":true,"expr":"histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"","legendFormat":"50th Percentile","refId":"B"},{"exemplar":true,"expr":"sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))","interval":"","legendFormat":"Average","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Request Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"$$hashKey":"object:210","format":"ms","label":null,"logBase":1,"max":null,"min":null,"show":true},{"$$hashKey":"object:211","format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":30,"style":"dark","tags":[],"templating":{"list":[{"current":{"selected":true,"text":"dev-cortex","value":"dev-cortex"},"description":null,"error":null,"hide":0,"includeAll":false,"label":null,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":["default/grafana"],"value":["default/grafana"]},"datasource":"$datasource","definition":"label_values(grafana_build_info, job)","description":null,"error":null,"hide":0,"includeAll":true,"label":null,"multi":true,"name":"job","options":[],"query":{"query":"label_values(grafana_build_info, job)","refId":"Billing Admin-job-Variable-Query"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(grafana_build_info, instance)","description":null,"error":null,"hide":0,"includeAll":true,"label":null,"multi":true,"name":"instance","options":[],"query":{"query":"label_values(grafana_build_info, instance)","refId":"Billing Admin-instance-Variable-Query"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": "Europe/Paris","title":"Grafana Overview","uid":"6be0s85Mk","version":2} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml deleted file mode 100644 index 457daff..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-pod.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-namespace-by-pod - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - namespace-by-pod.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":0,"y":1},"height":9,"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":12,"y":1},"height":9,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"pod","value":"pod"}],"datasource":"$datasource","fill":1,"fontSize":"100%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":6,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":20},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":20},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":9,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":30},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":30},"id":11,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":40},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":40},"id":14,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Namespace (Pods)","uid":"8b7a8b326d7a6f1f04244066368c67af","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml deleted file mode 100644 index f75d072..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-namespace-by-workload.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-namespace-by-workload - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - namespace-by-workload.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"columns":[{"text":"Time","value":"Time"},{"text":"Value #A","value":"Value #A"},{"text":"Value #B","value":"Value #B"},{"text":"Value #C","value":"Value #C"},{"text":"Value #D","value":"Value #D"},{"text":"Value #E","value":"Value #E"},{"text":"Value #F","value":"Value #F"},{"text":"Value #G","value":"Value #G"},{"text":"Value #H","value":"Value #H"},{"text":"workload","value":"workload"}],"datasource":"$datasource","fill":1,"fontSize":"90%","gridPos":{"h":9,"w":24,"x":0,"y":10},"id":5,"lines":true,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null as zero","renderer":"flot","scroll":true,"showHeader":true,"sort":{"col":0,"desc":false},"spaceLength":10,"span":24,"styles":[{"alias":"Time","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Time","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Current Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Received","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Average Bandwidth Transmitted","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"pps"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTooltip":"Drill down","linkUrl":"d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=${__value.text}","pattern":"workload","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"A","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"B","step":10},{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"C","step":10},{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"D","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"E","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"F","step":10},{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"G","step":10},{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":"","refId":"H","step":10}],"timeFrom":null,"timeShift":null,"title":"Current Status","type":"table"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":6,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":20},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":20},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ workload }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":9,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth HIstory","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":38},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":38},"id":11,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":39},"id":12,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":40},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":40},"id":14,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":40},"id":15,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":41},"id":16,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":41},"id":17,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{workload}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Namespace (Workload)","uid":"bbb2a765a623ae38130206c7d94a160f","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml deleted file mode 100644 index f09f66d..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-persistentvolumesusage.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-persistentvolumesusage - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - persistentvolumesusage.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","format":"time_series","intervalFactor":1,"legendFormat":"Used Space","refId":"A"},{"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","format":"time_series","intervalFactor":1,"legendFormat":"Free Space","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Volume Space Usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"$datasource","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"80, 90","title":"Volume Space Usage","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":true,"steppedLine":false,"targets":[{"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","format":"time_series","intervalFactor":1,"legendFormat":"Used inodes","refId":"A"},{"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","format":"time_series","intervalFactor":1,"legendFormat":" Free inodes","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Volume inodes Usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"none","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"$datasource","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"80, 90","title":"Volume inodes Usage","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"PersistentVolumeClaim","multi":false,"name":"volume","options":[],"query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-7d","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Persistent Volumes","uid":"919b92a8e8041bd567af9edab12c840c","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-pod-total.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-pod-total.yaml deleted file mode 100644 index 1fbc367..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-pod-total.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-pod-total - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - pod-total.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":0,"y":1},"height":9,"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace: $pod","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":0,"format":"time_series","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":9,"w":12,"x":12,"y":1},"height":9,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":12,"nullPointMode":"connected","nullText":null,"options":{"fieldOptions":{"calcs":["last"],"defaults":{"max":10000000000,"min":0,"title":"$namespace: $pod","unit":"Bps"},"mappings":[],"override":{},"thresholds":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}],"values":false}},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":12,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))","format":"time_series","instant":null,"intervalFactor":1,"legendFormat":"","refId":"A"}],"thresholds":"","timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","type":"gauge","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":5,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":20},"id":8,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":21},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":21},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":21},"id":11,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":32},"id":12,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":12,"y":32},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Pod","uid":"7a18067ce943a40ae25454675c19ff5c","version":0} \ No newline at end of file diff --git a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-workload-total.yaml b/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-workload-total.yaml deleted file mode 100644 index 7704520..0000000 --- a/monitor/dashboards-minimal/v1_ConfigMap_prometheus-community-kube-workload-total.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-workload-total - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - workload-total.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":2,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Bandwidth","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":1},"id":3,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":1},"id":4,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":10},"id":5,"panels":[{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":11},"id":6,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Received","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":11},"id":7,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":false,"linewidth":1,"links":[],"minSpan":24,"nullPointMode":"null","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":24,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{ pod }}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Rate of Bytes Transmitted","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"series","name":null,"show":false,"values":["current"]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Bandwidth","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":11},"id":8,"panels":[],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth HIstory","titleSize":"h6","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":12},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":12},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":21},"id":11,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":22},"id":12,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":22},"id":13,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Packets","titleSize":"h6","type":"row"},{"collapse":true,"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":22},"id":14,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":23},"id":15,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":2,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":12,"y":23},"id":16,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"minSpan":12,"nullPointMode":"connected","paceLength":10,"percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Errors","titleSize":"h6","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":18,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"kube-system","value":"kube-system"},"datasource":"$datasource","definition":"label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)","hide":0,"includeAll":false,"label":null,"multi":false,"name":"workload","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"resolution","options":[{"selected":false,"text":"30s","value":"30s"},{"selected":true,"text":"5m","value":"5m"},{"selected":false,"text":"1h","value":"1h"}],"query":"30s,5m,1h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"5m","value":"5m"},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"interval","options":[{"selected":true,"text":"4h","value":"4h"}],"query":"4h","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"interval","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Networking / Workload","uid":"728bf77cc1166d2f3133bf25846876cc","version":0} \ No newline at end of file diff --git a/monitor/dashboards-namespace/datas.tf b/monitor/dashboards-namespace/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/dashboards-namespace/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/dashboards-namespace/index.yaml b/monitor/dashboards-namespace/index.yaml deleted file mode 100644 index 33e31c4..0000000 --- a/monitor/dashboards-namespace/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: dashboards-namespace - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml b/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml deleted file mode 100644 index fa7d3df..0000000 --- a/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-namespace.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-namespace - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-namespace.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation (from requests)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation (from limits)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation (from requests)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation (from limits)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":17,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":18,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Namespace (Pods)","uid":"85a562078cdf77779eaa1add43ccec1e","version":0} \ No newline at end of file diff --git a/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml b/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml deleted file mode 100644 index 943f5ce..0000000 --- a/monitor/dashboards-namespace/v1_ConfigMap_prometheus-community-kube-k8s-resources-workloads-namespace.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-workloads-namespace - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-workloads-namespace.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"time_series","legendFormat":"{{workload}} - {{workload_type}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Running Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=${__data.fields.workload_type}","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"quota - requests","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"quota - limits","color":"#FF9830","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"time_series","legendFormat":"{{workload}} - {{workload_type}}","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})","format":"time_series","legendFormat":"quota - requests","legendLink":null},{"expr":"scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})","format":"time_series","legendFormat":"quota - limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Running Pods","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=${__data.fields.workload_type}","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Workload","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down to pods","linkUrl":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=${__value.text}&var-type=$type","pattern":"workload","thresholds":[],"type":"number","unit":"short"},{"alias":"Workload Type","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"workload_type","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Workload: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Workload: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Workload","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","format":"time_series","legendFormat":"{{workload}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"auto":false,"auto_count":30,"auto_min":"10s","current":{"text":"","value":""},"datasource":"$datasource","definition":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Namespace (Workloads)","uid":"a87fb0d919ec0ea5f6543124e16c42a5","version":0} \ No newline at end of file diff --git a/monitor/dashboards-workload/datas.tf b/monitor/dashboards-workload/datas.tf deleted file mode 100644 index 9797980..0000000 --- a/monitor/dashboards-workload/datas.tf +++ /dev/null @@ -1,16 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] -} diff --git a/monitor/dashboards-workload/index.yaml b/monitor/dashboards-workload/index.yaml deleted file mode 100644 index f04e950..0000000 --- a/monitor/dashboards-workload/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: dashboards-workload - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml b/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml deleted file mode 100644 index 2496ae8..0000000 --- a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-node.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-node - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-node.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"max capacity","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})","format":"time_series","legendFormat":"max capacity","legendLink":null},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"max capacity","color":"#F2495C","dashes":true,"fill":0,"hiddenSeries":true,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})","format":"time_series","legendFormat":"max capacity","legendLink":null},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":true,"name":"node","options":[],"query":"label_values(kube_node_info{cluster=\"$cluster\"}, node)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Node (Pods)","uid":"200ac8fdbfbb74b39aff88118e4d1c2c","version":0} \ No newline at end of file diff --git a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml b/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml deleted file mode 100644 index d72fc05..0000000 --- a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-pod.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-pod - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-pod.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"requests","color":"#F2495C","fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"limits","color":"#FF9830","fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","format":"time_series","legendFormat":"requests","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","format":"time_series","legendFormat":"limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":true,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[{"colorMode":"critical","fill":true,"line":true,"op":"gt","value":0.25,"yaxis":"left"}],"timeFrom":null,"timeShift":null,"title":"CPU Throttling","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Throttling","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"requests","color":"#F2495C","dashes":true,"fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false},{"alias":"limits","color":"#FF9830","dashes":true,"fill":0,"hideTooltip":true,"legend":true,"linewidth":2,"stack":false}],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","format":"time_series","legendFormat":"requests","legendLink":null},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","format":"time_series","legendFormat":"limits","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (WSS)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage (WSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Usage (RSS)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Cache)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #G","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Usage (Swap)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #H","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"F"},{"expr":"sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"G"},{"expr":"sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"Reads","legendLink":null},{"expr":"ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"Writes","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"Reads","legendLink":null},{"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"Writes","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution(Pod - Read & Writes)","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":null,"fill":10,"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"IOPS(Reads+Writes)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ThroughPut(Read+Write)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution(Containers)","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"sort":{"col":4,"desc":true},"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"IOPS(Reads)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"iops"},{"alias":"IOPS(Reads + Writes)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":3,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"iops"},{"alias":"Throughput(Read)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Throughput(Read + Write)","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Storage IO","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage IO - Distribution","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Pod","uid":"6581e46e4e5c7ba40a07646395ef7b23","version":0} \ No newline at end of file diff --git a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml b/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml deleted file mode 100644 index 284f30a..0000000 --- a/monitor/dashboards-workload/v1_ConfigMap_prometheus-community-kube-k8s-resources-workload.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-k8s-resources-workload - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - k8s-resources-workload.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Current Receive Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Current Transmit Bandwidth","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"Bps"},{"alias":"Rate of Received Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Received Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"pps"},{"alias":"Rate of Transmitted Packets Dropped","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #F","thresholds":[],"type":"number","unit":"pps"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__value.text}","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"E"},{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true,"legendFormat":"","refId":"F"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Current Network Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Current Network Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Receive Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Transmit Bandwidth","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Bandwidth","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Pod: Received","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Container Bandwidth by Pod: Transmitted","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Average Container Bandwidth by Pod","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Received Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rate of Transmitted Packets Dropped","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Rate of Packets Dropped","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"namespace","options":[],"query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"type","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"workload","options":[],"query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}, workload)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Compute Resources / Workload","uid":"a164a7f0339f99e89cea5cb47e9be617","version":0} \ No newline at end of file diff --git a/monitor/grafana/apps_v1_Deployment_grafana.yaml b/monitor/grafana/apps_v1_Deployment_grafana.yaml deleted file mode 100644 index 9023121..0000000 --- a/monitor/grafana/apps_v1_Deployment_grafana.yaml +++ /dev/null @@ -1,298 +0,0 @@ -# Source: grafana/templates/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - revisionHistoryLimit: 10 - selector: - matchLabels: - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - strategy: - type: RollingUpdate - template: - metadata: - labels: - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - annotations: - checksum/config: 0e9cbd0ea8e24e32f7dfca5bab17a2ba05652642f0a09a4882833ae88e4cc4a3 - checksum/sc-dashboard-provider-config: 593c0a8778b83f11fe80ccb21dfb20bc46705e2be3178df1dc4c89d164c8cd9c - kubectl.kubernetes.io/default-container: grafana - spec: - - serviceAccountName: grafana - automountServiceAccountToken: true - securityContext: - fsGroup: 472 - runAsGroup: 472 - runAsNonRoot: true - runAsUser: 472 - initContainers: - - name: init-chown-data - image: "docker.io/library/busybox:1.31.1" - imagePullPolicy: IfNotPresent - securityContext: - capabilities: - add: - - CHOWN - runAsNonRoot: false - runAsUser: 0 - seccompProfile: - type: RuntimeDefault - command: - - chown - - -R - - 472:472 - - /var/lib/grafana - volumeMounts: - - name: storage - mountPath: "/var/lib/grafana" - enableServiceLinks: true - containers: - - name: grafana-sc-dashboard - image: "quay.io/kiwigrid/k8s-sidecar:1.25.2" - imagePullPolicy: IfNotPresent - env: - - name: METHOD - value: WATCH - - name: LABEL - value: "grafana_dashboard" - - name: FOLDER - value: "/tmp/dashboards" - - name: RESOURCE - value: "both" - - name: REQ_USERNAME - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: username - - name: REQ_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: password - - name: REQ_URL - value: http://localhost:3000/api/admin/provisioning/dashboards/reload - - name: REQ_METHOD - value: POST - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - volumeMounts: - - name: sc-dashboard-volume - mountPath: "/tmp/dashboards" - - name: grafana-sc-datasources - image: "quay.io/kiwigrid/k8s-sidecar:1.25.2" - imagePullPolicy: IfNotPresent - env: - - name: METHOD - value: WATCH - - name: LABEL - value: "grafana_datasource" - - name: FOLDER - value: "/etc/grafana/provisioning/datasources" - - name: RESOURCE - value: "both" - - name: REQ_USERNAME - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: username - - name: REQ_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: password - - name: REQ_URL - value: http://localhost:3000/api/admin/provisioning/datasources/reload - - name: REQ_METHOD - value: POST - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - volumeMounts: - - name: sc-datasources-volume - mountPath: "/etc/grafana/provisioning/datasources" - - name: grafana-sc-notifiers - image: "quay.io/kiwigrid/k8s-sidecar:1.25.2" - imagePullPolicy: IfNotPresent - env: - - name: METHOD - value: WATCH - - name: LABEL - value: "grafana_notifier" - - name: FOLDER - value: "/etc/grafana/provisioning/notifiers" - - name: RESOURCE - value: "both" - - name: REQ_USERNAME - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: username - - name: REQ_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: password - - name: REQ_URL - value: http://localhost:3000/api/admin/provisioning/notifications/reload - - name: REQ_METHOD - value: POST - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - volumeMounts: - - name: sc-notifiers-volume - mountPath: "/etc/grafana/provisioning/notifiers" - - name: grafana-sc-plugins - image: "quay.io/kiwigrid/k8s-sidecar:1.25.2" - imagePullPolicy: IfNotPresent - env: - - name: METHOD - value: WATCH - - name: LABEL - value: "grafana_plugin" - - name: FOLDER - value: "/etc/grafana/provisioning/plugins" - - name: RESOURCE - value: "both" - - name: REQ_USERNAME - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: username - - name: REQ_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: password - - name: REQ_URL - value: http://localhost:3000/api/admin/provisioning/plugins/reload - - name: REQ_METHOD - value: POST - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - volumeMounts: - - name: sc-plugins-volume - mountPath: "/etc/grafana/provisioning/plugins" - - name: grafana - image: "docker.io/grafana/grafana:10.2.3" - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - volumeMounts: - - name: config - mountPath: "/etc/grafana/grafana.ini" - subPath: grafana.ini - - name: storage - mountPath: "/var/lib/grafana" - - name: sc-dashboard-volume - mountPath: "/tmp/dashboards" - - name: sc-dashboard-provider - mountPath: "/etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml" - subPath: provider.yaml - - name: sc-datasources-volume - mountPath: "/etc/grafana/provisioning/datasources" - - name: sc-plugins-volume - mountPath: "/etc/grafana/provisioning/plugins" - - name: sc-notifiers-volume - mountPath: "/etc/grafana/provisioning/notifiers" - ports: - - name: grafana - containerPort: 3000 - protocol: TCP - - name: gossip-tcp - containerPort: 9094 - protocol: TCP - - name: gossip-udp - containerPort: 9094 - protocol: UDP - env: - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: GF_SECURITY_ADMIN_USER - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: username - - name: GF_SECURITY_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin-user - key: password - - name: GF_PATHS_DATA - value: /var/lib/grafana/ - - name: GF_PATHS_LOGS - value: /var/log/grafana - - name: GF_PATHS_PLUGINS - value: /var/lib/grafana/plugins - - name: GF_PATHS_PROVISIONING - value: /etc/grafana/provisioning - livenessProbe: - failureThreshold: 10 - httpGet: - path: /api/health - port: 3000 - initialDelaySeconds: 60 - timeoutSeconds: 30 - readinessProbe: - httpGet: - path: /api/health - port: 3000 - volumes: - - name: config - configMap: - name: grafana - - name: storage - persistentVolumeClaim: - claimName: grafana - - name: sc-dashboard-volume - emptyDir: - {} - - name: sc-dashboard-provider - configMap: - name: grafana-config-dashboards - - name: sc-datasources-volume - emptyDir: - {} - - name: sc-plugins-volume - emptyDir: - {} - - name: sc-notifiers-volume - emptyDir: - {} \ No newline at end of file diff --git a/monitor/grafana/config.tf b/monitor/grafana/config.tf deleted file mode 100644 index d10de92..0000000 --- a/monitor/grafana/config.tf +++ /dev/null @@ -1,43 +0,0 @@ -resource "kubernetes_config_map_v1" "config" { - metadata { - name = "grafana" - namespace = var.namespace - labels = local.common-labels - } - data = { - "grafana.ini" = <<-EOF -[analytics] -check_for_updates = true -[grafana_net] -url = https://grafana.net -[log] -mode = console -[paths] -data = /var/lib/grafana/ -logs = /var/log/grafana -plugins = /var/lib/grafana/plugins -provisioning = /etc/grafana/provisioning -[server] -domain = '' -root_url = 'https://${local.dns_name}/' -[users] -auto_assign_org = true -auto_assign_org_id = 1 -[auth] -oauth_allow_insecure_email_lookup = true -signout_redirect_url = '${module.oauth2.sso_signout_url}' -oauth_auto_login = true -[auth.generic_oauth] -enabled = true -name = vynil -scopes = openid profile email -${var.issuer=="letsencrypt-prod"?";":""}tls_client_ca = /etc/local-certs/ca.crt -client_id = '${module.oauth2.client_id}' -client_secret = '${module.oauth2.client_secret}' -auth_url = '${module.oauth2.sso_authorize_url}' -api_url = '${module.oauth2.sso_userinfo_url}' -token_url = '${module.oauth2.sso_token_url}' -role_attribute_path = contains(groups, '${module.application.main_group}-admin') && 'Admin' || contains(groups, '${module.application.main_group}') && 'Editor' || 'Viewer' - EOF - } -} diff --git a/monitor/grafana/datas.tf b/monitor/grafana/datas.tf deleted file mode 100644 index 591b91d..0000000 --- a/monitor/grafana/datas.tf +++ /dev/null @@ -1,112 +0,0 @@ -locals { - authentik_url = "http://authentik.${var.domain}-auth.svc" - authentik_token = data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"] - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - pvc_spec = merge({ - "accessModes" = [var.storage.volume.accessMode] - "volumeMode" = var.storage.volume.type - "resources" = { - "requests" = { - "storage" = "${var.storage.volume.size}" - } - } - }, var.storage.volume.class != "" ?{ - "storageClassName" = var.storage.volume.class - }:{}) -} - - -data "kubernetes_secret_v1" "authentik" { - metadata { - name = "authentik" - namespace = "${var.domain}-auth" - } -} - -data "kubernetes_ingress_v1" "authentik" { - metadata { - name = "authentik" - namespace = "${var.domain}-auth" - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] - images { - name = "docker.io/grafana/grafana" - new_name = "${var.images.grafana.registry}/${var.images.grafana.repository}" - new_tag = "${var.images.grafana.tag}" - } - images { - name = "docker.io/library/busybox" - new_name = "${var.images.busybox.registry}/${var.images.busybox.repository}" - new_tag = "${var.images.busybox.tag}" - } - images { - name = "quay.io/kiwigrid/k8s-sidecar" - new_name = "${var.images.sidecar.registry}/${var.images.sidecar.repository}" - new_tag = "${var.images.sidecar.tag}" - } - patches { - target { - kind = "PersistentVolumeClaim" - name = "grafana" - } - patch = <<-EOF - kind: PersistentVolumeClaim - apiVersion: v1 - metadata: - name: grafana - annotations: - k8up.io/backup: "true" - spec: ${jsonencode(local.pvc_spec)} - EOF - } - patches { - target { - kind = "ServiceMonitor" - name = "grafana" - } - patch = <<-EOF - - op: replace - path: /spec/namespaceSelector/matchNames/0 - value: "${var.namespace}" - EOF - } - patches { - target { - kind = "Deployment" - name = "grafana" - } - patch = <<-EOF - apiVersion: apps/v1 - kind: Deployment - metadata: - name: grafana - annotations: - configmap.reloader.stakater.com/reload: "grafana" - spec: - template: - spec: - containers: - - name: grafana - volumeMounts: - - name: local-certs - mountPath: "/etc/local-certs" - volumes: - - name: local-certs - secret: - secretName: "${var.instance}-cert" - defaultMode: 0444 - EOF - } -} diff --git a/monitor/grafana/monitoring.coreos.com_v1_ServiceMonitor_grafana.yaml b/monitor/grafana/monitoring.coreos.com_v1_ServiceMonitor_grafana.yaml deleted file mode 100644 index f07dbab..0000000 --- a/monitor/grafana/monitoring.coreos.com_v1_ServiceMonitor_grafana.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Source: grafana/templates/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm -spec: - endpoints: - - port: service - interval: 30s - scrapeTimeout: 30s - honorLabels: true - path: /metrics - scheme: http - jobLabel: "grafana" - selector: - matchLabels: - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - namespaceSelector: - matchNames: - - vynil-monitor \ No newline at end of file diff --git a/monitor/grafana/presentation.tf b/monitor/grafana/presentation.tf deleted file mode 100644 index b38289f..0000000 --- a/monitor/grafana/presentation.tf +++ /dev/null @@ -1,72 +0,0 @@ -locals { - dns_name = "${var.sub_domain}.${var.domain_name}" - dns_names = [local.dns_name] - app_name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance) - icon = "public/img/grafana_icon.svg" - request_headers = { - "Content-Type" = "application/json" - Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}" - } - service = { - "name" = "grafana" - "port" = { - "number" = 80 - } - } -} - -module "ingress" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//ingress" - component = "" - instance = var.instance - namespace = var.namespace - issuer = var.issuer - ingress_class = var.ingress_class - labels = local.common-labels - dns_names = local.dns_names - middlewares = [] - services = [local.service] - providers = { - kubectl = kubectl - } -} - - module "application" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//application" - component = var.component - instance = var.instance - app_group = var.app_group - dns_name = local.dns_name - icon = local.icon - sub_groups = ["admin"] - protocol_provider = module.oauth2.provider-id - providers = { - authentik = authentik - } -} - -module "oauth2" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//oauth2" - component = var.component - instance = var.instance - namespace = var.namespace - domain = var.domain - labels = local.common-labels - dns_name = local.dns_name - redirect_path = "login/generic_oauth" - providers = { - kubernetes = kubernetes - kubectl = kubectl - authentik = authentik - } -} - -provider "restapi" { - uri = "http://authentik.${var.domain}-auth.svc/api/v3/" - headers = local.request_headers - create_method = "PATCH" - update_method = "PATCH" - destroy_method = "PATCH" - write_returns_object = true - id_attribute = "name" -} \ No newline at end of file diff --git a/monitor/grafana/rbac.authorization.k8s.io_v1_RoleBinding_grafana.yaml b/monitor/grafana/rbac.authorization.k8s.io_v1_RoleBinding_grafana.yaml deleted file mode 100644 index e5869e0..0000000 --- a/monitor/grafana/rbac.authorization.k8s.io_v1_RoleBinding_grafana.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Source: grafana/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: grafana -subjects: -- kind: ServiceAccount - name: grafana - namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/grafana/rbac.authorization.k8s.io_v1_Role_grafana.yaml b/monitor/grafana/rbac.authorization.k8s.io_v1_Role_grafana.yaml deleted file mode 100644 index ffd3ae6..0000000 --- a/monitor/grafana/rbac.authorization.k8s.io_v1_Role_grafana.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Source: grafana/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm -rules: - - apiGroups: [""] # "" indicates the core API group - resources: ["configmaps", "secrets"] - verbs: ["get", "watch", "list"] \ No newline at end of file diff --git a/monitor/grafana/secret.tf b/monitor/grafana/secret.tf deleted file mode 100644 index ee72b3e..0000000 --- a/monitor/grafana/secret.tf +++ /dev/null @@ -1,19 +0,0 @@ - -resource "kubectl_manifest" "grafana_secret" { - ignore_fields = ["metadata.annotations"] - yaml_body = <<-EOF - apiVersion: "secretgenerator.mittwald.de/v1alpha1" - kind: "StringSecret" - metadata: - name: "grafana-admin-user" - namespace: "${var.namespace}" - labels: ${jsonencode(local.common-labels)} - spec: - forceRegenerate: false - data: - username: "${var.admin_name}" - fields: - - fieldName: "password" - length: "32" - EOF -} diff --git a/monitor/grafana/v1_ConfigMap_grafana-config-dashboards.yaml b/monitor/grafana/v1_ConfigMap_grafana-config-dashboards.yaml deleted file mode 100644 index 1013796..0000000 --- a/monitor/grafana/v1_ConfigMap_grafana-config-dashboards.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: grafana/templates/configmap-dashboard-provider.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm - name: grafana-config-dashboards - namespace: vynil-monitor -data: - provider.yaml: |- - apiVersion: 1 - providers: - - name: 'sidecarProvider' - orgId: 1 - folder: '' - type: file - disableDeletion: false - allowUiUpdates: false - updateIntervalSeconds: 30 - options: - foldersFromFilesStructure: false - path: /tmp/dashboards \ No newline at end of file diff --git a/monitor/grafana/v1_PersistentVolumeClaim_grafana.yaml b/monitor/grafana/v1_PersistentVolumeClaim_grafana.yaml deleted file mode 100644 index ca79b05..0000000 --- a/monitor/grafana/v1_PersistentVolumeClaim_grafana.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Source: grafana/templates/pvc.yaml -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm - finalizers: - - kubernetes.io/pvc-protection -spec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "10Gi" \ No newline at end of file diff --git a/monitor/grafana/v1_ServiceAccount_grafana.yaml b/monitor/grafana/v1_ServiceAccount_grafana.yaml deleted file mode 100644 index ea06bc8..0000000 --- a/monitor/grafana/v1_ServiceAccount_grafana.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -# Source: grafana/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm - name: grafana - namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/grafana/v1_Service_grafana.yaml b/monitor/grafana/v1_Service_grafana.yaml deleted file mode 100644 index e6da791..0000000 --- a/monitor/grafana/v1_Service_grafana.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: grafana/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: grafana - namespace: vynil-monitor - labels: - helm.sh/chart: grafana-7.2.4 - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana - app.kubernetes.io/version: "10.2.3" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - name: service - port: 80 - protocol: TCP - targetPort: 3000 - selector: - app.kubernetes.io/name: grafana - app.kubernetes.io/instance: grafana \ No newline at end of file diff --git a/monitor/kube-state-metrics/apps_v1_Deployment_kube-state-metrics.yaml b/monitor/kube-state-metrics/apps_v1_Deployment_kube-state-metrics.yaml deleted file mode 100644 index 2c580f9..0000000 --- a/monitor/kube-state-metrics/apps_v1_Deployment_kube-state-metrics.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: kube-state-metrics - namespace: vynil-monitor - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics -spec: - selector: - matchLabels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - replicas: 1 - strategy: - type: RollingUpdate - revisionHistoryLimit: 10 - template: - metadata: - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics - spec: - hostNetwork: false - serviceAccountName: kube-state-metrics - securityContext: - fsGroup: 65534 - runAsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - seccompProfile: - type: RuntimeDefault - containers: - - name: kube-state-metrics - args: - - --port=8080 - - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments - imagePullPolicy: IfNotPresent - image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1 - ports: - - containerPort: 8080 - name: "http" - livenessProbe: - failureThreshold: 3 - httpGet: - httpHeaders: - path: /healthz - port: 8080 - scheme: HTTP - initialDelaySeconds: 5 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 5 - readinessProbe: - failureThreshold: 3 - httpGet: - httpHeaders: - path: / - port: 8080 - scheme: HTTP - initialDelaySeconds: 5 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 5 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL \ No newline at end of file diff --git a/monitor/kube-state-metrics/datas.tf b/monitor/kube-state-metrics/datas.tf deleted file mode 100644 index f26ff7e..0000000 --- a/monitor/kube-state-metrics/datas.tf +++ /dev/null @@ -1,49 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - rb-patch = <<-EOF - - op: replace - path: /subjects/0/namespace - value: "${var.namespace}" - EOF -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] - images { - name = "registry.k8s.io/kube-state-metrics/kube-state-metrics" - new_name = "${var.images.kube-state-metrics.registry}/${var.images.kube-state-metrics.repository}" - new_tag = "${var.images.kube-state-metrics.tag}" - } - patches { - target { - kind = "ServiceMonitor" - name = "kube-state-metrics" - } - patch = <<-EOF - - op: replace - path: /spec/selector/matchLabels/app.kubernetes.io~1instance - value: "${var.instance}" - EOF - } -} - -data "kustomization_overlay" "data_no_ns" { - common_labels = local.common-labels - resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("ClusterRole",file))>0] - patches { - target { - kind = "ClusterRoleBinding" - name = "kube-state-metrics" - } - patch = local.rb-patch - } -} diff --git a/monitor/kube-state-metrics/index.yaml b/monitor/kube-state-metrics/index.yaml deleted file mode 100644 index 11a8a72..0000000 --- a/monitor/kube-state-metrics/index.yaml +++ /dev/null @@ -1,57 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: kube-state-metrics - description: null -options: - images: - default: - kube-state-metrics: - pullPolicy: IfNotPresent - registry: registry.k8s.io - repository: kube-state-metrics/kube-state-metrics - tag: v2.10.1 - examples: - - kube-state-metrics: - pullPolicy: IfNotPresent - registry: registry.k8s.io - repository: kube-state-metrics/kube-state-metrics - tag: v2.10.1 - properties: - kube-state-metrics: - default: - pullPolicy: IfNotPresent - registry: registry.k8s.io - repository: kube-state-metrics/kube-state-metrics - tag: v2.10.1 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: registry.k8s.io - type: string - repository: - default: kube-state-metrics/kube-state-metrics - type: string - tag: - default: v2.10.1 - type: string - type: object - type: object -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_kube-state-metrics-kube-pr-kube-state-metrics.yaml b/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_kube-state-metrics-kube-pr-kube-state-metrics.yaml deleted file mode 100644 index 663750b..0000000 --- a/monitor/kube-state-metrics/monitoring.coreos.com_v1_PrometheusRule_kube-state-metrics-kube-pr-kube-state-metrics.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: kube-state-metrics-kube-pr-kube-state-metrics - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "kube-state-metrics" - heritage: "Helm" -spec: - groups: - - name: kube-state-metrics - rules: - - alert: KubeStateMetricsListErrors - annotations: - description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors - summary: kube-state-metrics is experiencing errors in list operations. - expr: |- - (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) - / - sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster)) - > 0.01 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsWatchErrors - annotations: - description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors - summary: kube-state-metrics is experiencing errors in watch operations. - expr: |- - (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) - / - sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster)) - > 0.01 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsShardingMismatch - annotations: - description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch - summary: kube-state-metrics sharding is misconfigured. - expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) != 0 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsShardsMissing - annotations: - description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing - summary: kube-state-metrics shards are missing. - expr: |- - 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1 - - - sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster) - != 0 - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_kube-state-metrics.yaml b/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_kube-state-metrics.yaml deleted file mode 100644 index 5034788..0000000 --- a/monitor/kube-state-metrics/monitoring.coreos.com_v1_ServiceMonitor_kube-state-metrics.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-state-metrics - namespace: vynil-monitor - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics -spec: - jobLabel: app.kubernetes.io/name - selector: - matchLabels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - endpoints: - - port: http - honorLabels: true \ No newline at end of file diff --git a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_kube-state-metrics.yaml b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_kube-state-metrics.yaml deleted file mode 100644 index 1b5d050..0000000 --- a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRoleBinding_kube-state-metrics.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: -- kind: ServiceAccount - name: kube-state-metrics - namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_kube-state-metrics.yaml b/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_kube-state-metrics.yaml deleted file mode 100644 index f335187..0000000 --- a/monitor/kube-state-metrics/rbac.authorization.k8s.io_v1_ClusterRole_kube-state-metrics.yaml +++ /dev/null @@ -1,155 +0,0 @@ -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics - name: kube-state-metrics -rules: - -- apiGroups: ["certificates.k8s.io"] - resources: - - certificatesigningrequests - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - configmaps - verbs: ["list", "watch"] - -- apiGroups: ["batch"] - resources: - - cronjobs - verbs: ["list", "watch"] - -- apiGroups: ["extensions", "apps"] - resources: - - daemonsets - verbs: ["list", "watch"] - -- apiGroups: ["extensions", "apps"] - resources: - - deployments - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - endpoints - verbs: ["list", "watch"] - -- apiGroups: ["autoscaling"] - resources: - - horizontalpodautoscalers - verbs: ["list", "watch"] - -- apiGroups: ["extensions", "networking.k8s.io"] - resources: - - ingresses - verbs: ["list", "watch"] - -- apiGroups: ["batch"] - resources: - - jobs - verbs: ["list", "watch"] - -- apiGroups: ["coordination.k8s.io"] - resources: - - leases - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - limitranges - verbs: ["list", "watch"] - -- apiGroups: ["admissionregistration.k8s.io"] - resources: - - mutatingwebhookconfigurations - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - namespaces - verbs: ["list", "watch"] - -- apiGroups: ["networking.k8s.io"] - resources: - - networkpolicies - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - nodes - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - persistentvolumeclaims - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - persistentvolumes - verbs: ["list", "watch"] - -- apiGroups: ["policy"] - resources: - - poddisruptionbudgets - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - pods - verbs: ["list", "watch"] - -- apiGroups: ["extensions", "apps"] - resources: - - replicasets - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - replicationcontrollers - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - resourcequotas - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - secrets - verbs: ["list", "watch"] - -- apiGroups: [""] - resources: - - services - verbs: ["list", "watch"] - -- apiGroups: ["apps"] - resources: - - statefulsets - verbs: ["list", "watch"] - -- apiGroups: ["storage.k8s.io"] - resources: - - storageclasses - verbs: ["list", "watch"] - -- apiGroups: ["admissionregistration.k8s.io"] - resources: - - validatingwebhookconfigurations - verbs: ["list", "watch"] - -- apiGroups: ["storage.k8s.io"] - resources: - - volumeattachments - verbs: ["list", "watch"] \ No newline at end of file diff --git a/monitor/kube-state-metrics/ressources_no_ns.tf b/monitor/kube-state-metrics/ressources_no_ns.tf deleted file mode 100644 index 9fa58b7..0000000 --- a/monitor/kube-state-metrics/ressources_no_ns.tf +++ /dev/null @@ -1,45 +0,0 @@ - -# first loop through resources in ids_prio[0] -resource "kustomization_resource" "pre_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) -} - -# then loop through resources in ids_prio[1] -# and set an explicit depends_on on kustomization_resource.pre -# wait 2 minutes for any deployment or daemonset to become ready -resource "kustomization_resource" "main_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - wait = true - timeouts { - create = "5m" - update = "5m" - } - - depends_on = [kustomization_resource.pre_no_ns] -} - -# finally, loop through resources in ids_prio[2] -# and set an explicit depends_on on kustomization_resource.main -resource "kustomization_resource" "post_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - - depends_on = [kustomization_resource.main_no_ns] -} diff --git a/monitor/kube-state-metrics/v1_ServiceAccount_kube-state-metrics.yaml b/monitor/kube-state-metrics/v1_ServiceAccount_kube-state-metrics.yaml deleted file mode 100644 index d275f41..0000000 --- a/monitor/kube-state-metrics/v1_ServiceAccount_kube-state-metrics.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics - name: kube-state-metrics - namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/kube-state-metrics/v1_Service_kube-state-metrics.yaml b/monitor/kube-state-metrics/v1_Service_kube-state-metrics.yaml deleted file mode 100644 index 56e2a69..0000000 --- a/monitor/kube-state-metrics/v1_Service_kube-state-metrics.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Source: kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: kube-state-metrics - namespace: vynil-monitor - labels: - helm.sh/chart: kube-state-metrics-5.16.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: kube-state-metrics - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics - app.kubernetes.io/version: "2.10.1" - release: kube-state-metrics - annotations: - prometheus.io/scrape: 'true' -spec: - type: "ClusterIP" - ports: - - name: "http" - protocol: TCP - port: 8080 - targetPort: 8080 - - selector: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/instance: kube-state-metrics \ No newline at end of file diff --git a/monitor/loki-dashboard/index.yaml b/monitor/loki-dashboard/index.yaml deleted file mode 100644 index d1c0f24..0000000 --- a/monitor/loki-dashboard/index.yaml +++ /dev/null @@ -1,82 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: loki-dashboard - description: null -options: - ingress_class: - default: traefik - examples: - - traefik - type: string - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string - domain: - default: your-company - examples: - - your-company - type: string - images: - default: - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - examples: - - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - operator: - default: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: docker.io - type: string - repository: - default: to-be/defined - type: string - tag: - default: v1.0.0 - type: string - type: object - type: object - sub_domain: - default: to-be-set - examples: - - to-be-set - type: string - domain_name: - default: your_company.com - examples: - - your_company.com - type: string -dependencies: [] -providers: - kubernetes: true - authentik: true - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-1.yaml b/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-1.yaml deleted file mode 100644 index 5d9de03..0000000 --- a/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-1.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: loki/templates/monitoring/dashboards/configmap-1.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: loki-dashboards-1 - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - grafana_dashboard: "1" -data: - "loki-chunks.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"series","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"chunks","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks per series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Active Series / Chunks","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Log Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Index Entries","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Index Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Queue Length","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Flush Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks Flushed/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{reason}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Flush Reason","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":1,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":11,"legend":{"show":true},"span":12,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":12,"legend":{"show":true},"span":12,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p90","legendLink":null,"step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Size Quantiles","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10},{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"avg","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Duration hours (end-start)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Duration","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Chunks","uid":"chunks","version":0} - "loki-deletion.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"none","id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Number of Pending Requests","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"dtdurations","id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Oldest Pending Request Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"received","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Received / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"processed","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Processed / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Churn","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))","format":"time_series","intervalFactor":2,"legendFormat":"failures","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Failures in Loading Delete Requests / Hour","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Failures","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)","format":"time_series","intervalFactor":2,"legendFormat":"{{user}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Lines Deleted / Sec","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Deleted lines","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Deletion","uid":"deletion","version":0} - "loki-logs.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":8,"iteration":1583185057230,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":0,"y":0},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"goroutines","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":3,"y":0},"hiddenSeries":false,"id":41,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_gc_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)","legendFormat":"{{quantile}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"gc duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":6,"y":0},"hiddenSeries":false,"id":36,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"cpu","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":9,"y":0},"hiddenSeries":false,"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"working set","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":12,"y":0},"hiddenSeries":false,"id":38,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"tx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":15,"y":0},"hiddenSeries":false,"id":39,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"rx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":18,"y":0},"hiddenSeries":false,"id":37,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) \u003e 0","legendFormat":"{{reason}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"restarts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":21,"y":0},"hiddenSeries":false,"id":42,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)","legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"bad words","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":4},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"warn","color":"#FF780A"},{"alias":"error","color":"#E02F44"},{"alias":"info","color":"#56A64B"},{"alias":"debug","color":"#3274D9"}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=~\"$level\" |= \"$filter\" [5m])) by (level)","intervalFactor":3,"legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":19,"w":24,"x":0,"y":6},"id":29,"maxDataPoints":"","options":{"showLabels":false,"showTime":true,"sortOrder":"Descending","wrapLogMessage":true},"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=~\"$level\" |= \"$filter\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"deployment","options":[],"query":"label_values(kube_deployment_created{cluster=\"$cluster\", namespace=\"$namespace\"}, deployment)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"selected":true,"text":"","value":""},"hide":0,"includeAll":false,"label":"","multi":true,"name":"level","options":[{"selected":false,"text":"debug","value":"debug"},{"selected":false,"text":"info","value":"info"},{"selected":false,"text":"warn","value":"warn"},{"selected":false,"text":"error","value":"error"}],"query":"debug,info,warn,error","refresh":0,"type":"custom"},{"current":{"selected":false,"text":"","value":""},"label":"LogQL Filter","name":"filter","query":"","type":"textbox"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Logs","uid":"logs","version":0} - "loki-mixin-recording-rules.json": | - {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations \u0026 Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"},{"datasource":"${datasource}","enable":false,"expr":"sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))","iconColor":"red","name":"WAL Truncations","target":{"queryType":"Azure Monitor","refId":"Anno"},"titleFormat":"{{tenant}}"}]},"editable":true,"fiscalYearStartMonth":0,"gnetId":null,"graphTooltip":0,"iteration":1635347545534,"links":[],"liveNow":false,"panels":[{"datasource":"${datasource}","fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":1}]}},"overrides":[]},"gridPos":{"h":10,"w":2,"x":0,"y":0},"id":2,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.0-38205pre","targets":[{"datasource":"${datasource}","exemplar":false,"expr":"sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0","instant":true,"interval":"","legendFormat":"","refId":"A"}],"title":"Appenders Not Ready","type":"stat"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":2,"y":0},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Appended to WAL per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Series are unique combinations of labels","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":13,"y":0},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Series Created per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":10},"id":6,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Write Behind","type":"timeseries"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":10},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Sent per Second","type":"timeseries"},{"datasource":"${datasource}","description":"\n","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":20},"id":8,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"WAL Disk Size","type":"timeseries"},{"datasource":"${datasource}","description":"Some number of pending samples is expected, but if remote-write is failing this value will remain high","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":20},"id":9,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Pending Samples","type":"timeseries"}],"schemaVersion":31,"style":"dark","tags":[],"templating":{"list":[{"description":null,"error":null,"hide":0,"includeAll":false,"label":"Datasource","multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":null,"datasource":"${datasource}","definition":"label_values(loki_ruler_wal_samples_appended_total, tenant)","description":null,"error":null,"hide":0,"includeAll":true,"label":"Tenant","multi":true,"name":"tenant","options":[],"query":{"query":"label_values(loki_ruler_wal_samples_appended_total, tenant)","refId":"StandardVariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":0,"type":"query"}]},"time":{"from":"now-6h","to":"now"},"timepicker":{},"timezone":"","title":"Recording Rules","uid":"2xKA_ZK7k","version":9,"weekStart":""} - "loki-operational.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":68,"iteration":1588704280892,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":17,"panels":[],"targets":[],"title":"Main","type":"row"},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":0,"y":1},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queries/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":4,"y":1},"hiddenSeries":false,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Pushes/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":12,"y":1},"hiddenSeries":false,"id":2,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Per Tenant (top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":16,"y":1},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MBs Per Tenant (Top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":20,"y":1},"hiddenSeries":false,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) \u003e 0","hide":false,"interval":"","legendFormat":"{{container}}-{{pod}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Container Restarts","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":6},"hiddenSeries":false,"id":9,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.75, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Push Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":6},"hiddenSeries":false,"id":12,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":6},"hiddenSeries":false,"id":71,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":11},"hiddenSeries":false,"id":13,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":11},"hiddenSeries":false,"id":72,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":16},"hiddenSeries":false,"id":10,"legend":{"alignAsTable":true,"avg":false,"current":false,"hideEmpty":true,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":14,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":73,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":21},"hiddenSeries":false,"id":15,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":21},"hiddenSeries":false,"id":74,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":110,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":27},"hiddenSeries":false,"id":112,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))","interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Discarded Lines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"columns":[],"datasource":"$datasource","fontSize":"100%","gridPos":{"h":8,"w":12,"x":12,"y":27},"id":113,"pageSize":null,"panels":[],"showHeader":true,"sort":{"col":3,"desc":true},"styles":[{"alias":"Time","align":"auto","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"tenant","thresholds":[],"type":"string","unit":"short"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"reason","thresholds":[],"type":"number","unit":"short"},{"alias":"","align":"right","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":2,"pattern":"/.*/","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))","format":"table","instant":true,"interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Discarded Lines Per Interval","transform":"table","type":"table-old"}],"targets":[],"title":"Limits","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":27},"id":23,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":28},"hiddenSeries":false,"id":26,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":28},"hiddenSeries":false,"id":27,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":12,"x":12,"y":28},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#C4162A"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":12,"x":12,"y":32},"id":29,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":35},"hiddenSeries":false,"id":33,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":35},"hiddenSeries":false,"id":32,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (ingester)","intervalFactor":1,"legendFormat":"{{ingester}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Append Failures By Ingester","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":42},"hiddenSeries":false,"id":34,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Bytes Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":42},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Write Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":104,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":30},"hiddenSeries":false,"id":106,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Active Streams","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":30},"hiddenSeries":false,"id":108,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]) \u003e 0))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Streams Created/Sec","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Streams","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":94,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":31},"hiddenSeries":false,"id":102,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"De-Dupe Ratio","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","interval":"","legendFormat":"Chunks","refId":"A"},{"expr":"sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) \u003c 1","interval":"","legendFormat":"De-Dupe Ratio","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunks Flushed/Sec","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":8,"w":12,"x":12,"y":31},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":100,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) by (le)","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":7,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":39},"hiddenSeries":false,"id":96,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))","interval":"","legendFormat":"{{ reason }}"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunk Flush Reason %","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":"1","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"max":null,"min":null,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":9,"w":12,"x":12,"y":39},"heatmap":{},"hideZeroBuckets":true,"highlightCards":true,"id":98,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null}],"targets":[],"title":"Chunks","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":64,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":32},"hiddenSeries":false,"id":68,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":39},"hiddenSeries":false,"id":69,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":3,"w":18,"x":12,"y":32},"hiddenSeries":false,"id":65,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#F2495C"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":18,"x":12,"y":35},"id":66,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":46},"hiddenSeries":false,"id":70,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[1m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[1m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Read Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":52,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":30},"hiddenSeries":false,"id":53,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","intervalFactor":1,"legendFormat":"{{container}}: .99-{{method}}-{{name}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .9-{{method}}-{{name}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .5-{{method}}-{{name}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":38},"hiddenSeries":false,"id":54,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)","intervalFactor":1,"legendFormat":"{{container}}: {{status_code}}-{{method}}-{{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Memcached","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":33},"id":57,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":31},"hiddenSeries":false,"id":55,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":39},"hiddenSeries":false,"id":58,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Consul","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":43,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":9},"hiddenSeries":false,"id":41,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":9},"hiddenSeries":false,"id":46,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":9},"hiddenSeries":false,"id":44,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":9},"hiddenSeries":false,"id":45,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":16},"hiddenSeries":false,"id":47,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":16},"hiddenSeries":false,"id":50,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":48,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":49,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Big Table","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":60,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":33},"hiddenSeries":false,"id":61,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":41},"hiddenSeries":false,"id":62,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"GCS","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":76,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":9},"id":82,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Failure Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":6,"y":9},"id":83,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Consumed Capacity Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":12,"y":9},"id":84,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Throttled Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":18,"y":9},"id":85,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Dropped Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":15},"id":86,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Pages","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":6,"y":15},"id":87,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":15,"y":15},"id":88,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Dynamo","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"S3","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Azure Blob","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":114,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":115,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":116,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"BoltDB Shipper","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Operational","uid":"operational","version":0} \ No newline at end of file diff --git a/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-2.yaml b/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-2.yaml deleted file mode 100644 index 1fd43eb..0000000 --- a/monitor/loki-dashboard/v1_ConfigMap_loki-dashboards-2.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: loki/templates/monitoring/dashboards/configmap-2.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: loki-dashboards-2 - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - grafana_dashboard: "1" -data: - "loki-reads-resources.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_query_readiness_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Query Readiness Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read path","titleSize":"h6","type":"row"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Ingester","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads Resources","uid":"reads-resources","version":0} - "loki-reads.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads","uid":"reads","version":0} - "loki-retention.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Resource Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"fixed"},"custom":{},"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"dateTimeFromNow"}},"fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"text":{},"textMode":"auto"},"percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3","format":"time_series","instant":true,"refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Last Compact and Mark Operation Success","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"stat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{success}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Compact and Mark","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processed Tables Per Action","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}-{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Modified Tables","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) \u003e0","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Creation Rate Per Table","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Per Table Marker","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Marked Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Mark Table Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Delete Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Sweeper","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"lag","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sweeper Lag","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":15,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"count","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Files to Process","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":16,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Rate Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"datasource":"$logs","id":17,"span":12,"targets":[{"expr":"{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}","refId":"A"}],"title":"Compactor Logs","type":"logs"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Logs","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Retention","uid":"retention","version":0} - "loki-writes-resources.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"In-memory streams","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write path","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes Resources","uid":"writes-resources","version":0} - "loki-writes.json": | - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes","uid":"writes","version":0} \ No newline at end of file diff --git a/monitor/loki/apps_v1_StatefulSet_loki.yaml b/monitor/loki/apps_v1_StatefulSet_loki.yaml deleted file mode 100644 index 3bced30..0000000 --- a/monitor/loki/apps_v1_StatefulSet_loki.yaml +++ /dev/null @@ -1,124 +0,0 @@ -# Source: loki/templates/single-binary/statefulset.yaml -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: loki - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: single-binary - app.kubernetes.io/part-of: memberlist -spec: - replicas: 1 - podManagementPolicy: Parallel - updateStrategy: - rollingUpdate: - partition: 0 - serviceName: loki-headless - revisionHistoryLimit: 10 - - persistentVolumeClaimRetentionPolicy: - whenDeleted: Delete - whenScaled: Delete - selector: - matchLabels: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/component: single-binary - template: - metadata: - annotations: - checksum/config: f0b5fe7288abac6838f61aacfebeba1a01d5cf3d391971062c58797d2f0ea40f - labels: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/component: single-binary - app.kubernetes.io/part-of: memberlist - spec: - serviceAccountName: loki - automountServiceAccountToken: true - enableServiceLinks: true - - securityContext: - fsGroup: 10001 - runAsGroup: 10001 - runAsNonRoot: true - runAsUser: 10001 - terminationGracePeriodSeconds: 30 - containers: - - name: loki - image: docker.io/grafana/loki:2.9.3 - imagePullPolicy: IfNotPresent - args: - - -config.file=/etc/loki/config/config.yaml - - -target=all - ports: - - name: http-metrics - containerPort: 3100 - protocol: TCP - - name: grpc - containerPort: 9095 - protocol: TCP - - name: http-memberlist - containerPort: 7946 - protocol: TCP - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - readinessProbe: - httpGet: - path: /ready - port: http-metrics - initialDelaySeconds: 30 - timeoutSeconds: 1 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: config - mountPath: /etc/loki/config - - name: runtime-config - mountPath: /etc/loki/runtime-config - - name: storage - mountPath: /var/loki - resources: - {} - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/component: single-binary - topologyKey: kubernetes.io/hostname - - volumes: - - name: tmp - emptyDir: {} - - name: config - configMap: - name: loki - items: - - key: "config.yaml" - path: "config.yaml" - - name: runtime-config - configMap: - name: loki-runtime - volumeClaimTemplates: - - apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: "17Gi" \ No newline at end of file diff --git a/monitor/loki/config.tf b/monitor/loki/config.tf deleted file mode 100644 index de79b58..0000000 --- a/monitor/loki/config.tf +++ /dev/null @@ -1,91 +0,0 @@ -resource "kubectl_manifest" "datasource" { - yaml_body = <<-EOF - apiVersion: v1 - kind: ConfigMap - metadata: - name: loki-datasource - namespace: "${var.namespace}" - labels: ${jsonencode(merge(local.common-labels, {"grafana_datasource" = "1"}))} - data: - loki-datasource.yaml: |- - apiVersion: 1 - datasources: - - name: Loki - type: loki - access: proxy - url: "http://loki.${var.namespace}.svc:3100" - version: 1 - isDefault: false - jsonData: - {} - EOF -} -resource "kubectl_manifest" "config" { - yaml_body = <<-EOF - apiVersion: v1 - kind: ConfigMap - metadata: - name: loki - namespace: "${var.namespace}" - labels: ${jsonencode(local.common-labels)} - data: - config.yaml: | - auth_enabled: false - common: - compactor_address: 'loki' - path_prefix: /var/loki - replication_factor: 1 - storage: - filesystem: - chunks_directory: /var/loki/chunks - rules_directory: /var/loki/rules - frontend: - scheduler_address: "" - frontend_worker: - scheduler_address: "" - index_gateway: - mode: ring - limits_config: - max_cache_freshness_per_query: 10m - reject_old_samples: true - reject_old_samples_max_age: 168h - split_queries_by_interval: 15m - memberlist: - join_members: - - loki-memberlist - query_range: - align_queries_with_step: true - ruler: - storage: - type: local - local: - directory: /tmp/rules - rule_path: /tmp/scratch - alertmanager_url: http://${var.alertmanager}:9093 - ring: - kvstore: - store: inmemory - enable_api: true - runtime_config: - file: /etc/loki/runtime-config/runtime-config.yaml - schema_config: - configs: - - from: "2022-01-11" - index: - period: 24h - prefix: loki_index_ - object_store: filesystem - schema: v12 - store: boltdb-shipper - server: - grpc_listen_port: 9095 - http_listen_port: 3100 - storage_config: - hedging: - at: 250ms - max_per_second: 20 - up_to: 3 - tracing: - enabled: false - EOF -} diff --git a/monitor/loki/datas.tf b/monitor/loki/datas.tf deleted file mode 100644 index 32c9304..0000000 --- a/monitor/loki/datas.tf +++ /dev/null @@ -1,77 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - pvc_spec = merge({ - "accessModes" = [var.storage.volume.accessMode] - "volumeMode" = var.storage.volume.type - "resources" = { - "requests" = { - "storage" = "${var.storage.volume.size}" - } - } - }, var.storage.volume.class != "" ?{ - "storageClassName" = var.storage.volume.class - }:{}) - rb-patch = <<-EOF - - op: replace - path: /subjects/0/namespace - value: "${var.namespace}" - EOF -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] - images { - name = "docker.io/grafana/loki" - new_name = "${var.images.loki.registry}/${var.images.loki.repository}" - new_tag = "${var.images.loki.tag}" - } - patches { - target { - kind = "ServiceMonitor" - name = "loki" - } - patch = <<-EOF - - op: replace - path: /spec/endpoints/0/relabelings/0/replacement - value: "${var.namespace}/$1" - EOF - } - patches { - target { - kind = "StatefulSet" - name = "loki" - } - patch = <<-EOF - apiVersion: apps/v1 - kind: StatefulSet - metadata: - name: loki - spec: - replicas: 1 - template: - spec: - containers: - - name: loki - imagePullPolicy: ${var.images.loki.pullPolicy} - volumeClaimTemplates: - - apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: storage - annotations: - k8up.io/backup: "true" - spec: ${jsonencode(local.pvc_spec)} - EOF - } -} - - diff --git a/monitor/loki/index.yaml b/monitor/loki/index.yaml deleted file mode 100644 index 5b84219..0000000 --- a/monitor/loki/index.yaml +++ /dev/null @@ -1,105 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: loki - description: null -options: - alertmanager: - default: alertmanager-alertmanager - examples: - - alertmanager-alertmanager - type: string - images: - default: - loki: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/loki - tag: 2.9.3 - examples: - - loki: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/loki - tag: 2.9.3 - properties: - loki: - default: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/loki - tag: 2.9.3 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: docker.io - type: string - repository: - default: grafana/loki - type: string - tag: - default: 2.9.3 - type: string - type: object - type: object - storage: - default: - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - description: Configure this app storage - examples: - - volume: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - volume: - default: - accessMode: ReadWriteOnce - class: '' - size: 10Gi - type: Filesystem - properties: - accessMode: - default: ReadWriteOnce - enum: - - ReadWriteOnce - - ReadOnlyMany - - ReadWriteMany - type: string - class: - default: '' - type: string - size: - default: 10Gi - type: string - type: - default: Filesystem - enum: - - Filesystem - - Block - type: string - type: object - type: object -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-alerts.yaml b/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-alerts.yaml deleted file mode 100644 index 11d5948..0000000 --- a/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-alerts.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Source: loki/templates/monitoring/loki-alerts.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - name: loki-loki-alerts - namespace: vynil-monitor -spec: - groups: - - name: loki_alerts - rules: - - alert: LokiRequestErrors - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. - expr: | - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) - / - sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) - > 10 - for: 15m - labels: - severity: critical - - alert: LokiRequestPanics - annotations: - message: | - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. - expr: | - sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 - labels: - severity: critical - - alert: LokiRequestLatency - annotations: - message: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - expr: | - namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1 - for: 15m - labels: - severity: critical - - alert: LokiTooManyCompactorsRunning - annotations: - message: | - {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. - expr: | - sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 - for: 5m - labels: - severity: warning - - name: loki_canaries_alerts - rules: - - alert: LokiCanaryLatency - annotations: - message: | - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - expr: | - histogram_quantile(0.99, sum(rate(loki_canary_response_latency_seconds_bucket[5m])) by (le, namespace, job)) > 5 - for: 15m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-rules.yaml b/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-rules.yaml deleted file mode 100644 index e343977..0000000 --- a/monitor/loki/monitoring.coreos.com_v1_PrometheusRule_loki-loki-rules.yaml +++ /dev/null @@ -1,98 +0,0 @@ -# Source: loki/templates/monitoring/loki-rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - name: loki-loki-rules - namespace: vynil-monitor -spec: - groups: - - name: loki_rules - rules: - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, job)) - labels: - cluster: loki - record: job:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, job)) - labels: - cluster: loki - record: job:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m])) - by (job) - labels: - cluster: loki - record: job:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job) - labels: - cluster: loki - record: job:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) - labels: - cluster: loki - record: job:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job) - labels: - cluster: loki - record: job:loki_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, job, route)) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, job, route)) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m])) - by (job, route) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job, route) - labels: - cluster: loki - record: job_route:loki_request_duration_seconds_count:sum_rate - - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, namespace, job, route)) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds:99quantile - - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) - by (le, namespace, job, route)) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds:50quantile - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route) - / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds:avg - - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, - route) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate - - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate - - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) - labels: - cluster: loki - record: namespace_job_route:loki_request_duration_seconds_count:sum_rate \ No newline at end of file diff --git a/monitor/loki/monitoring.coreos.com_v1_ServiceMonitor_loki.yaml b/monitor/loki/monitoring.coreos.com_v1_ServiceMonitor_loki.yaml deleted file mode 100644 index 013f2c8..0000000 --- a/monitor/loki/monitoring.coreos.com_v1_ServiceMonitor_loki.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Source: loki/templates/monitoring/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: loki - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -spec: - selector: - matchLabels: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - matchExpressions: - - key: prometheus.io/service-monitor - operator: NotIn - values: - - "false" - endpoints: - - port: http-metrics - path: /metrics - interval: 15s - relabelings: - - sourceLabels: [job] - action: replace - replacement: "vynil-monitor/$1" - targetLabel: job - - action: replace - replacement: "loki" - targetLabel: cluster - scheme: http \ No newline at end of file diff --git a/monitor/loki/v1_ConfigMap_loki-runtime.yaml b/monitor/loki/v1_ConfigMap_loki-runtime.yaml deleted file mode 100644 index 6a0f623..0000000 --- a/monitor/loki/v1_ConfigMap_loki-runtime.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# Source: loki/templates/runtime-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: loki-runtime - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -data: - runtime-config.yaml: | - {} \ No newline at end of file diff --git a/monitor/loki/v1_ServiceAccount_loki.yaml b/monitor/loki/v1_ServiceAccount_loki.yaml deleted file mode 100644 index 78ae8a6..0000000 --- a/monitor/loki/v1_ServiceAccount_loki.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -# Source: loki/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: loki - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -automountServiceAccountToken: true \ No newline at end of file diff --git a/monitor/loki/v1_Service_loki-headless.yaml b/monitor/loki/v1_Service_loki-headless.yaml deleted file mode 100644 index 1b1fbb7..0000000 --- a/monitor/loki/v1_Service_loki-headless.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Source: loki/templates/single-binary/service-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: loki-headless - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - variant: headless - prometheus.io/service-monitor: "false" - annotations: -spec: - clusterIP: None - ports: - - name: http-metrics - port: 3100 - targetPort: http-metrics - protocol: TCP - selector: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki \ No newline at end of file diff --git a/monitor/loki/v1_Service_loki-memberlist.yaml b/monitor/loki/v1_Service_loki-memberlist.yaml deleted file mode 100644 index 188c14d..0000000 --- a/monitor/loki/v1_Service_loki-memberlist.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: loki/templates/service-memberlist.yaml -apiVersion: v1 -kind: Service -metadata: - name: loki-memberlist - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - clusterIP: None - ports: - - name: tcp - port: 7946 - targetPort: http-memberlist - protocol: TCP - selector: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/part-of: memberlist \ No newline at end of file diff --git a/monitor/loki/v1_Service_loki.yaml b/monitor/loki/v1_Service_loki.yaml deleted file mode 100644 index 4d885d4..0000000 --- a/monitor/loki/v1_Service_loki.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Source: loki/templates/single-binary/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: loki - namespace: vynil-monitor - labels: - helm.sh/chart: loki-5.41.8 - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - annotations: -spec: - type: ClusterIP - ports: - - name: http-metrics - port: 3100 - targetPort: http-metrics - protocol: TCP - - name: grpc - port: 9095 - targetPort: grpc - protocol: TCP - selector: - app.kubernetes.io/name: loki - app.kubernetes.io/instance: loki - app.kubernetes.io/component: single-binary \ No newline at end of file diff --git a/monitor/monitor-control-plan/index.yaml b/monitor/monitor-control-plan/index.yaml deleted file mode 100644 index c38c83e..0000000 --- a/monitor/monitor-control-plan/index.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: monitor-control-plan - description: null -options: - useless: - default: true - examples: - - true - type: boolean -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml deleted file mode 100644 index da7bd00..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-etcd.yaml +++ /dev/null @@ -1,167 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-etcd - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: etcd - rules: - - alert: etcdMembersDown - annotations: - description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).' - summary: etcd cluster members are down. - expr: |- - max without (endpoint) ( - sum without (instance) (up{job=~".*etcd.*"} == bool 0) - or - count without (To) ( - sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 - ) - ) - > 0 - for: 10m - labels: - severity: critical - - alert: etcdInsufficientMembers - annotations: - description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).' - summary: etcd cluster has insufficient number of members. - expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) - for: 3m - labels: - severity: critical - - alert: etcdNoLeader - annotations: - description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.' - summary: etcd cluster has no leader. - expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 - for: 1m - labels: - severity: critical - - alert: etcdHighNumberOfLeaderChanges - annotations: - description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' - summary: etcd cluster has high number of leader changes. - expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 - for: 5m - labels: - severity: warning - - alert: etcdHighNumberOfFailedGRPCRequests - annotations: - description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' - summary: etcd cluster has high number of failed grpc requests. - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) - > 1 - for: 10m - labels: - severity: warning - - alert: etcdHighNumberOfFailedGRPCRequests - annotations: - description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' - summary: etcd cluster has high number of failed grpc requests. - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) - > 5 - for: 5m - labels: - severity: critical - - alert: etcdGRPCRequestsSlow - annotations: - description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method.' - summary: etcd grpc requests are slow - expr: |- - histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) - > 0.15 - for: 10m - labels: - severity: critical - - alert: etcdMemberCommunicationSlow - annotations: - description: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.' - summary: etcd cluster member communication is slow. - expr: |- - histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.15 - for: 10m - labels: - severity: warning - - alert: etcdHighNumberOfFailedProposals - annotations: - description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.' - summary: etcd cluster has high number of proposal failures. - expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 - for: 15m - labels: - severity: warning - - alert: etcdHighFsyncDurations - annotations: - description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' - summary: etcd cluster 99th percentile fsync durations are too high. - expr: |- - histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.5 - for: 10m - labels: - severity: warning - - alert: etcdHighFsyncDurations - annotations: - description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' - summary: etcd cluster 99th percentile fsync durations are too high. - expr: |- - histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 1 - for: 10m - labels: - severity: critical - - alert: etcdHighCommitDurations - annotations: - description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.' - summary: etcd cluster 99th percentile commit durations are too high. - expr: |- - histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.25 - for: 10m - labels: - severity: warning - - alert: etcdDatabaseQuotaLowSpace - annotations: - description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' - summary: etcd cluster database is running full. - expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 - for: 10m - labels: - severity: critical - - alert: etcdExcessiveDatabaseGrowth - annotations: - description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.' - summary: etcd cluster database growing very fast. - expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} - for: 10m - labels: - severity: warning - - alert: etcdDatabaseHighFragmentationRatio - annotations: - description: 'etcd cluster "{{ $labels.job }}": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' - runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation - summary: etcd database size in use is less than 50% of the actual allocated storage. - expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 - for: 10m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml deleted file mode 100644 index a5c172e..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-availability.rules.yaml +++ /dev/null @@ -1,129 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-apiserver-availability.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - interval: 3m - name: kube-apiserver-availability.rules - rules: - - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) - labels: - verb: read - record: code:apiserver_request_total:increase30d - - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - labels: - verb: write - record: code:apiserver_request_total:increase30d - - expr: sum by (cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job="apiserver"}[1h])) - record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h - - expr: sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30) - record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d - - expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) - record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h - - expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) - record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d - - expr: |- - 1 - ( - ( - # write too slow - sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) - ) + - ( - # read too slow - sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - - - ( - ( - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) - or - vector(0) - ) - + - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) - + - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) - ) - ) + - # errors - sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) - ) - / - sum by (cluster) (code:apiserver_request_total:increase30d) - labels: - verb: all - record: apiserver_request:availability30d - - expr: |- - 1 - ( - sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - - - ( - # too slow - ( - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) - or - vector(0) - ) - + - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) - + - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) - ) - + - # errors - sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) - ) - / - sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) - labels: - verb: read - record: apiserver_request:availability30d - - expr: |- - 1 - ( - ( - # too slow - sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - - sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) - ) - + - # errors - sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) - ) - / - sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) - labels: - verb: write - record: apiserver_request:availability30d - - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: code_resource:apiserver_request_total:rate5m - - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: code_resource:apiserver_request_total:rate5m - - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) - record: code_verb:apiserver_request_total:increase1h - - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) - record: code_verb:apiserver_request_total:increase1h - - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) - record: code_verb:apiserver_request_total:increase1h - - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) - record: code_verb:apiserver_request_total:increase1h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml deleted file mode 100644 index 12a7204..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-burnrate.rules.yaml +++ /dev/null @@ -1,321 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-apiserver-burnrate.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-apiserver-burnrate.rules - rules: - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) - labels: - verb: read - record: apiserver_request:burnrate1d - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) - labels: - verb: read - record: apiserver_request:burnrate1h - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) - labels: - verb: read - record: apiserver_request:burnrate2h - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) - labels: - verb: read - record: apiserver_request:burnrate30m - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) - labels: - verb: read - record: apiserver_request:burnrate3d - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: apiserver_request:burnrate5m - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - - ( - ( - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) - or - vector(0) - ) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) - + - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) - ) - ) - + - # errors - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) - labels: - verb: read - record: apiserver_request:burnrate6h - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - labels: - verb: write - record: apiserver_request:burnrate1d - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - labels: - verb: write - record: apiserver_request:burnrate1h - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - labels: - verb: write - record: apiserver_request:burnrate2h - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - labels: - verb: write - record: apiserver_request:burnrate30m - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - labels: - verb: write - record: apiserver_request:burnrate3d - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: apiserver_request:burnrate5m - - expr: |- - ( - ( - # too slow - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - - sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) - ) - + - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) - ) - / - sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - labels: - verb: write - record: apiserver_request:burnrate6h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml deleted file mode 100644 index b734489..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-histogram.rules.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-apiserver-histogram.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-apiserver-histogram.rules - rules: - - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 - labels: - quantile: '0.99' - verb: read - record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 - labels: - quantile: '0.99' - verb: write - record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml deleted file mode 100644 index f96d3c9..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-apiserver-slos.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-apiserver-slos - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-apiserver-slos - rules: - - alert: KubeAPIErrorBudgetBurn - annotations: - description: The API server is burning too much error budget. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn - summary: The API server is burning too much error budget. - expr: |- - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) - and - sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) - for: 2m - labels: - long: 1h - severity: critical - short: 5m - - alert: KubeAPIErrorBudgetBurn - annotations: - description: The API server is burning too much error budget. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn - summary: The API server is burning too much error budget. - expr: |- - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) - and - sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) - for: 15m - labels: - long: 6h - severity: critical - short: 30m - - alert: KubeAPIErrorBudgetBurn - annotations: - description: The API server is burning too much error budget. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn - summary: The API server is burning too much error budget. - expr: |- - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) - and - sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) - for: 1h - labels: - long: 1d - severity: warning - short: 2h - - alert: KubeAPIErrorBudgetBurn - annotations: - description: The API server is burning too much error budget. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn - summary: The API server is burning too much error budget. - expr: |- - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) - and - sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) - for: 3h - labels: - long: 3d - severity: warning - short: 6h \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml deleted file mode 100644 index 23306ff..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kube-scheduler.rules.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kube-scheduler.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kube-scheduler.rules - rules: - - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml deleted file mode 100644 index bdb1581..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-controller-manager.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system-controller-manager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-controller-manager - rules: - - alert: KubeControllerManagerDown - annotations: - description: KubeControllerManager has disappeared from Prometheus target discovery. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown - summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="kube-controller-manager"} == 1) - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml deleted file mode 100644 index 7c5e58d..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_PrometheusRule_prometheus-community-kube-kubernetes-system-scheduler.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-community-kube-kubernetes-system-scheduler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-scheduler - rules: - - alert: KubeSchedulerDown - annotations: - description: KubeScheduler has disappeared from Prometheus target discovery. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeschedulerdown - summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="kube-scheduler"} == 1) - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml deleted file mode 100644 index 298535d..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-apiserver.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-community-kube-apiserver - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-apiserver - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: https - scheme: https - metricRelabelings: - - action: drop - regex: apiserver_request_duration_seconds_bucket;(0.15|0.2|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2|3|3.5|4|4.5|6|7|8|9|15|25|40|50) - sourceLabels: - - __name__ - - le - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - serverName: kubernetes - insecureSkipVerify: false - jobLabel: component - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - component: apiserver - provider: kubernetes \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml deleted file mode 100644 index c844853..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-controller-manager.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-community-kube-kube-controller-manager - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-kube-controller-manager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app: kube-prometheus-stack-kube-controller-manager - release: "prometheus-community" - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml deleted file mode 100644 index 0fda783..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-etcd.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-community-kube-kube-etcd - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-kube-etcd - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app: kube-prometheus-stack-kube-etcd - release: "prometheus-community" - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml b/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml deleted file mode 100644 index 0681073..0000000 --- a/monitor/monitor-control-plan/monitoring.coreos.com_v1_ServiceMonitor_prometheus-community-kube-kube-scheduler.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-community-kube-kube-scheduler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-kube-scheduler - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app: kube-prometheus-stack-kube-scheduler - release: "prometheus-community" - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true \ No newline at end of file diff --git a/monitor/monitor-control-plan/ressources_no_ns.tf b/monitor/monitor-control-plan/ressources_no_ns.tf deleted file mode 100644 index 9fa58b7..0000000 --- a/monitor/monitor-control-plan/ressources_no_ns.tf +++ /dev/null @@ -1,45 +0,0 @@ - -# first loop through resources in ids_prio[0] -resource "kustomization_resource" "pre_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) -} - -# then loop through resources in ids_prio[1] -# and set an explicit depends_on on kustomization_resource.pre -# wait 2 minutes for any deployment or daemonset to become ready -resource "kustomization_resource" "main_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - wait = true - timeouts { - create = "5m" - update = "5m" - } - - depends_on = [kustomization_resource.pre_no_ns] -} - -# finally, loop through resources in ids_prio[2] -# and set an explicit depends_on on kustomization_resource.main -resource "kustomization_resource" "post_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - - depends_on = [kustomization_resource.main_no_ns] -} diff --git a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-apiserver.yaml b/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-apiserver.yaml deleted file mode 100644 index f196aa2..0000000 --- a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-apiserver.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-apiserver - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - apiserver.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"content":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.","datasource":null,"description":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.","gridPos":{"h":2,"w":24,"x":0,"y":0},"id":2,"mode":"markdown","span":12,"title":"Notice","type":"text"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":3,"description":"How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?","format":"percentunit","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":4,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Availability (30d) > 99.000%","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","decimals":3,"description":"How much error budget is left looking at our 0.990% availability guarantees?","fill":10,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":8,"stack":false,"steppedLine":false,"targets":[{"expr":"100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)","format":"time_series","intervalFactor":2,"legendFormat":"errorbudget","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ErrorBudget (30d) > 99.000%","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":3,"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"decimals":3,"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":3,"description":"How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?","format":"percentunit","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Read Availability (30d)","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many read requests (LIST,GET) per second do the apiservers get by code?","fill":10,"fillGradient":0,"gridPos":{},"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/2../i","color":"#56A64B"},{"alias":"/3../i","color":"#F2CC0C"},{"alias":"/4../i","color":"#3274D9"},{"alias":"/5../i","color":"#E02F44"}],"spaceLength":10,"span":3,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{ code }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Read SLI - Requests","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?","fill":1,"fillGradient":0,"gridPos":{},"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{ resource }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Read SLI - Errors","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many seconds is the 99th percentile for reading (LIST|GET) a given resource?","fill":1,"fillGradient":0,"gridPos":{},"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{ resource }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Read SLI - Duration","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","decimals":3,"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?","format":"percentunit","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":9,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":3,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Write Availability (30d)","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?","fill":10,"fillGradient":0,"gridPos":{},"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/2../i","color":"#56A64B"},{"alias":"/3../i","color":"#F2CC0C"},{"alias":"/4../i","color":"#3274D9"},{"alias":"/5../i","color":"#E02F44"}],"spaceLength":10,"span":3,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{ code }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Write SLI - Requests","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"reqps","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?","fill":1,"fillGradient":0,"gridPos":{},"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{ resource }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Write SLI - Errors","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?","fill":1,"fillGradient":0,"gridPos":{},"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":3,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{ resource }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Write SLI - Duration","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":13,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Add Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":14,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Depth","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":15,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Latency","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":16,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":17,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":18,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"apiserver\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / API server","uid":"09ec8aa1e996d6ffcd6817bbaff4db1b","version":0} \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-controller-manager.yaml b/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-controller-manager.yaml deleted file mode 100644 index 6da6e6a..0000000 --- a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-controller-manager.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-controller-manager - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - controller-manager.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Up","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"min"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Add Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Depth","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} {{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Work Queue Latency","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Kube API Request Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":8,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Post Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Get Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-controller-manager\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Controller Manager","uid":"72e0e05bef5099e5f049b05fdc429ed4","version":0} \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-etcd.yaml b/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-etcd.yaml deleted file mode 100644 index ad7ade6..0000000 --- a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-etcd.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-etcd - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - etcd.json: |- - {"description":"etcd sample Grafana dashboard with Prometheus","panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"gridPos":{"h":7,"w":6,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none","graphMode":"none","reduceOptions":{"calcs":["lastNotNull"]}},"pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(etcd_server_has_leader{job=~\".*etcd.*\", job=\"$cluster\"})","legendFormat":"{{cluster}} - {{namespace}}\n"}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":6,"y":0},"id":2,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_started_total{job=~\".*etcd.*\", job=\"$cluster\",grpc_type=\"unary\"}[$__rate_interval]))","legendFormat":"RPC rate"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", job=\"$cluster\",grpc_type=\"unary\",grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[$__rate_interval]))","legendFormat":"RPC failed rate"}],"title":"RPC rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":3,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})","legendFormat":"Watch streams"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})","legendFormat":"Lease streams"}],"title":"Active streams","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":25},"id":4,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\", job=\"$cluster\"}","legendFormat":"{{instance}} DB size"}],"title":"DB size","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":8,"y":25},"id":5,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} WAL fsync"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} DB fsync"}],"title":"Disk sync duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":16,"y":25},"id":6,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"process_resident_memory_bytes{job=~\".*etcd.*\", job=\"$cluster\"}","legendFormat":"{{instance}} resident memory"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":0,"y":50},"id":7,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_received_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic in"}],"title":"Client traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":6,"y":50},"id":8,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_sent_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic out"}],"title":"Client traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":12,"y":50},"id":9,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_received_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} peer traffic in"}],"title":"Peer traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":18,"y":50},"id":10,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_sent_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} peer traffic out"}],"title":"Peer traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":0,"y":75},"id":11,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per day"}],"title":"Raft proposals","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":8,"y":75},"id":12,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per day"}],"title":"Total leader elections per day","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":16,"y":75},"id":13,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])))","legendFormat":"{{instance}} peer round trip time"}],"title":"Peer round trip time","type":"timeseries"}],"refresh":"10s","schemaVersion":36,"tags":["etcd-mixin"],"templating":{"list":[{"label":"Data Source","name":"datasource","query":"prometheus","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"cluster","name":"cluster","query":"label_values(etcd_server_has_leader{job=~\".*etcd.*\"}, job)","refresh":2,"type":"query","hide":2}]},"time":{"from":"now-15m","to":"now"},"timezone": "Europe/Paris","title":"etcd","uid":"c2f4e12cdf69feb95caa41a5a1b423d9"} \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-scheduler.yaml b/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-scheduler.yaml deleted file mode 100644 index b01fa26..0000000 --- a/monitor/monitor-control-plan/v1_ConfigMap_prometheus-community-kube-scheduler.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-community-kube-scheduler - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: - scheduler.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Up","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"min"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} e2e","refId":"A"},{"expr":"sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} binding","refId":"B"},{"expr":"sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} scheduling algorithm","refId":"C"},{"expr":"sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} volume","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Scheduling Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} e2e","refId":"A"},{"expr":"histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} binding","refId":"B"},{"expr":"histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} scheduling algorithm","refId":"C"},{"expr":"histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}} {{instance}} volume","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Scheduling latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Kube API Request Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":8,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Post Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Get Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-scheduler\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Scheduler","uid":"2e6b6a3b4bddf1427b3a55aa1311c656","version":0} \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml deleted file mode 100644 index 809256c..0000000 --- a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-controller-manager.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: prometheus-community-kube-kube-controller-manager - labels: - app: kube-prometheus-stack-kube-controller-manager - jobLabel: kube-controller-manager - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 10257 - protocol: TCP - targetPort: 10257 - selector: - component: kube-controller-manager - type: ClusterIP \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml deleted file mode 100644 index 681444c..0000000 --- a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-etcd.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: prometheus-community-kube-kube-etcd - labels: - app: kube-prometheus-stack-kube-etcd - jobLabel: kube-etcd - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 2381 - protocol: TCP - targetPort: 2381 - selector: - component: etcd - type: ClusterIP \ No newline at end of file diff --git a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml b/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml deleted file mode 100644 index 6fbe33c..0000000 --- a/monitor/monitor-control-plan/v1_Service_prometheus-community-kube-kube-scheduler.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: prometheus-community-kube-kube-scheduler - labels: - app: kube-prometheus-stack-kube-scheduler - jobLabel: kube-scheduler - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 10259 - protocol: TCP - targetPort: 10259 - selector: - component: kube-scheduler - type: ClusterIP \ No newline at end of file diff --git a/monitor/node-exporter/apps_v1_DaemonSet_node-exporter-prometheus-node-exporter.yaml b/monitor/node-exporter/apps_v1_DaemonSet_node-exporter-prometheus-node-exporter.yaml deleted file mode 100644 index d919580..0000000 --- a/monitor/node-exporter/apps_v1_DaemonSet_node-exporter-prometheus-node-exporter.yaml +++ /dev/null @@ -1,119 +0,0 @@ -# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: node-exporter-prometheus-node-exporter - namespace: vynil-monitor - labels: - helm.sh/chart: prometheus-node-exporter-4.26.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: prometheus-node-exporter - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "1.7.0" - jobLabel: node-exporter - release: node-exporter -spec: - selector: - matchLabels: - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - revisionHistoryLimit: 10 - updateStrategy: - rollingUpdate: - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - annotations: - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - labels: - helm.sh/chart: prometheus-node-exporter-4.26.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: prometheus-node-exporter - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "1.7.0" - jobLabel: node-exporter - release: node-exporter - spec: - automountServiceAccountToken: false - securityContext: - fsGroup: 65534 - runAsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - serviceAccountName: node-exporter-prometheus-node-exporter - containers: - - name: node-exporter - image: quay.io/prometheus/node-exporter:v1.7.0 - imagePullPolicy: IfNotPresent - args: - - --path.procfs=/host/proc - - --path.sysfs=/host/sys - - --path.rootfs=/host/root - - --path.udev.data=/host/root/run/udev/data - - --web.listen-address=[$(HOST_IP)]:9100 - - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ - securityContext: - readOnlyRootFilesystem: true - env: - - name: HOST_IP - value: 0.0.0.0 - ports: - - name: http-metrics - containerPort: 9100 - protocol: TCP - livenessProbe: - failureThreshold: 3 - httpGet: - httpHeaders: - path: / - port: 9100 - scheme: HTTP - initialDelaySeconds: 0 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - readinessProbe: - failureThreshold: 3 - httpGet: - httpHeaders: - path: / - port: 9100 - scheme: HTTP - initialDelaySeconds: 0 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - volumeMounts: - - name: proc - mountPath: /host/proc - readOnly: true - - name: sys - mountPath: /host/sys - readOnly: true - - name: root - mountPath: /host/root - mountPropagation: HostToContainer - readOnly: true - hostNetwork: true - hostPID: true - nodeSelector: - kubernetes.io/os: linux - tolerations: - - effect: NoSchedule - operator: Exists - volumes: - - name: proc - hostPath: - path: /proc - - name: sys - hostPath: - path: /sys - - name: root - hostPath: - path: / \ No newline at end of file diff --git a/monitor/node-exporter/datas.tf b/monitor/node-exporter/datas.tf deleted file mode 100644 index 7068eea..0000000 --- a/monitor/node-exporter/datas.tf +++ /dev/null @@ -1,34 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml"] - images { - name = "quay.io/prometheus/node-exporter" - new_name = "${var.images.node-exporter.registry}/${var.images.node-exporter.repository}" - new_tag = "${var.images.node-exporter.tag}" - } - patches { - target { - kind = "ServiceMonitor" - name = "node-exporter-prometheus-node-exporter" - } - patch = <<-EOF - - op: replace - path: /spec/selector/matchLabels/app.kubernetes.io~1instance - value: "${var.instance}" - EOF - } -} - - diff --git a/monitor/node-exporter/index.yaml b/monitor/node-exporter/index.yaml deleted file mode 100644 index 2b6f7fe..0000000 --- a/monitor/node-exporter/index.yaml +++ /dev/null @@ -1,57 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: node-exporter - description: null -options: - images: - default: - node-exporter: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/node-exporter - tag: v1.7.0 - examples: - - node-exporter: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/node-exporter - tag: v1.7.0 - properties: - node-exporter: - default: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/node-exporter - tag: v1.7.0 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: quay.io - type: string - repository: - default: prometheus/node-exporter - type: string - tag: - default: v1.7.0 - type: string - type: object - type: object -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.rules.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.rules.yaml deleted file mode 100644 index 9242545..0000000 --- a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.rules.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: node-exporter-kube-prometh-node-exporter.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -spec: - groups: - - name: node-exporter.rules - rules: - - expr: |- - count without (cpu, mode) ( - node_cpu_seconds_total{job="node-exporter",mode="idle"} - ) - record: instance:node_num_cpu:sum - - expr: |- - 1 - avg without (cpu) ( - sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) - ) - record: instance:node_cpu_utilisation:rate5m - - expr: |- - ( - node_load1{job="node-exporter"} - / - instance:node_num_cpu:sum{job="node-exporter"} - ) - record: instance:node_load1_per_cpu:ratio - - expr: |- - 1 - ( - ( - node_memory_MemAvailable_bytes{job="node-exporter"} - or - ( - node_memory_Buffers_bytes{job="node-exporter"} - + - node_memory_Cached_bytes{job="node-exporter"} - + - node_memory_MemFree_bytes{job="node-exporter"} - + - node_memory_Slab_bytes{job="node-exporter"} - ) - ) - / - node_memory_MemTotal_bytes{job="node-exporter"} - ) - record: instance:node_memory_utilisation:ratio - - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) - record: instance:node_vmstat_pgmajfault:rate5m - - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) - record: instance_device:node_disk_io_time_seconds:rate5m - - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) - record: instance_device:node_disk_io_time_weighted_seconds:rate5m - - expr: |- - sum without (device) ( - rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_receive_bytes_excluding_lo:rate5m - - expr: |- - sum without (device) ( - rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_transmit_bytes_excluding_lo:rate5m - - expr: |- - sum without (device) ( - rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_receive_drop_excluding_lo:rate5m - - expr: |- - sum without (device) ( - rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_transmit_drop_excluding_lo:rate5m \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.yaml deleted file mode 100644 index d8439f6..0000000 --- a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-exporter.yaml +++ /dev/null @@ -1,328 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: node-exporter-kube-prometh-node-exporter - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -spec: - groups: - - name: node-exporter - rules: - - alert: NodeFilesystemSpaceFillingUp - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup - summary: Filesystem is predicted to run out of space within the next 24 hours. - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemSpaceFillingUp - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup - summary: Filesystem is predicted to run out of space within the next 4 hours. - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace - summary: Filesystem has less than 5% space left. - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 30m - labels: - severity: warning - - alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace - summary: Filesystem has less than 3% space left. - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 30m - labels: - severity: critical - - alert: NodeFilesystemFilesFillingUp - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup - summary: Filesystem is predicted to run out of inodes within the next 24 hours. - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemFilesFillingUp - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup - summary: Filesystem is predicted to run out of inodes within the next 4 hours. - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles - summary: Filesystem has less than 5% inodes left. - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles - summary: Filesystem has less than 3% inodes left. - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeNetworkReceiveErrs - annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs - summary: Network interface is reporting many receive errors. - expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 - for: 1h - labels: - severity: warning - - alert: NodeNetworkTransmitErrs - annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs - summary: Network interface is reporting many transmit errors. - expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 - for: 1h - labels: - severity: warning - - alert: NodeHighNumberConntrackEntriesUsed - annotations: - description: '{{ $value | humanizePercentage }} of conntrack entries are used.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused - summary: Number of conntrack are getting close to the limit. - expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 - labels: - severity: warning - - alert: NodeTextFileCollectorScrapeError - annotations: - description: Node Exporter text file collector on {{ $labels.instance }} failed to scrape. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror - summary: Node Exporter text file collector failed to scrape. - expr: node_textfile_scrape_error{job="node-exporter"} == 1 - labels: - severity: warning - - alert: NodeClockSkewDetected - annotations: - description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected - summary: Clock skew detected. - expr: |- - ( - node_timex_offset_seconds{job="node-exporter"} > 0.05 - and - deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 - ) - or - ( - node_timex_offset_seconds{job="node-exporter"} < -0.05 - and - deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 - ) - for: 10m - labels: - severity: warning - - alert: NodeClockNotSynchronising - annotations: - description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising - summary: Clock not synchronising. - expr: |- - min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 - and - node_timex_maxerror_seconds{job="node-exporter"} >= 16 - for: 10m - labels: - severity: warning - - alert: NodeRAIDDegraded - annotations: - description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded - summary: RAID Array is degraded. - expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 - for: 15m - labels: - severity: critical - - alert: NodeRAIDDiskFailure - annotations: - description: At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure - summary: Failed device in RAID array. - expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 - labels: - severity: warning - - alert: NodeFileDescriptorLimit - annotations: - description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit - summary: Kernel is predicted to exhaust file descriptors limit soon. - expr: |- - ( - node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 - ) - for: 15m - labels: - severity: warning - - alert: NodeFileDescriptorLimit - annotations: - description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit - summary: Kernel is predicted to exhaust file descriptors limit soon. - expr: |- - ( - node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 - ) - for: 15m - labels: - severity: critical - - alert: NodeCPUHighUsage - annotations: - description: 'CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage - summary: High CPU usage. - expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 - for: 15m - labels: - severity: info - - alert: NodeSystemSaturation - annotations: - description: 'System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. - - This might indicate this instance resources saturation and can cause it becoming unresponsive. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation - summary: System saturated, load per core is very high. - expr: |- - node_load1{job="node-exporter"} - / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 - for: 15m - labels: - severity: warning - - alert: NodeMemoryMajorPagesFaults - annotations: - description: 'Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. - - Please check that there is enough memory available at this instance. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults - summary: Memory major page faults are occurring at very high rate. - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 - for: 15m - labels: - severity: warning - - alert: NodeMemoryHighUtilization - annotations: - description: 'Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization - summary: Host is running out of memory. - expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 - for: 15m - labels: - severity: warning - - alert: NodeDiskIOSaturation - annotations: - description: 'Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. - - This symptom might indicate disk saturation. - - ' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation - summary: Disk IO queue is high. - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 - for: 30m - labels: - severity: warning - - alert: NodeSystemdServiceFailed - annotations: - description: Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }} - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed - summary: Systemd service has entered failed state. - expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 - for: 5m - labels: - severity: warning - - alert: NodeBondingDegraded - annotations: - description: Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded - summary: Bonding interface is degraded - expr: (node_bonding_slaves - node_bonding_active) != 0 - for: 5m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-network.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-network.yaml deleted file mode 100644 index cb1bdac..0000000 --- a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node-network.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: node-exporter-kube-prometh-node-network - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -spec: - groups: - - name: node-network - rules: - - alert: NodeNetworkInterfaceFlapping - annotations: - description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping - summary: Network interface is often changing its status - expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 - for: 2m - labels: - severity: warning \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node.rules.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node.rules.yaml deleted file mode 100644 index cf2dec4..0000000 --- a/monitor/node-exporter/monitoring.coreos.com_v1_PrometheusRule_node-exporter-kube-prometh-node.rules.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: node-exporter-kube-prometh-node.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -spec: - groups: - - name: node.rules - rules: - - expr: |- - topk by (cluster, namespace, pod) (1, - max by (cluster, node, namespace, pod) ( - label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") - )) - record: 'node_namespace_pod:kube_pod_info:' - - expr: |- - count by (cluster, node) ( - node_cpu_seconds_total{mode="idle",job="node-exporter"} - * on (cluster, namespace, pod) group_left(node) - topk by (cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:) - ) - record: node:node_num_cpu:sum - - expr: |- - sum( - node_memory_MemAvailable_bytes{job="node-exporter"} or - ( - node_memory_Buffers_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Slab_bytes{job="node-exporter"} - ) - ) by (cluster) - record: :node_memory_MemAvailable_bytes:sum - - expr: |- - avg by (cluster, node) ( - sum without (mode) ( - rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) - ) - ) - record: node:node_cpu_utilization:ratio_rate5m - - expr: |- - avg by (cluster) ( - node:node_cpu_utilization:ratio_rate5m - ) - record: cluster:node_cpu:ratio_rate5m \ No newline at end of file diff --git a/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_node-exporter-prometheus-node-exporter.yaml b/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_node-exporter-prometheus-node-exporter.yaml deleted file mode 100644 index 890da43..0000000 --- a/monitor/node-exporter/monitoring.coreos.com_v1_ServiceMonitor_node-exporter-prometheus-node-exporter.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: node-exporter-prometheus-node-exporter - namespace: vynil-monitor - labels: - helm.sh/chart: prometheus-node-exporter-4.26.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: prometheus-node-exporter - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "1.7.0" - jobLabel: node-exporter - release: node-exporter -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - attachMetadata: - node: false - endpoints: - - port: http-metrics - scheme: http \ No newline at end of file diff --git a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-cluster-rsrc-use.yaml b/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-cluster-rsrc-use.yaml deleted file mode 100644 index b111460..0000000 --- a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-cluster-rsrc-use.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: node-exporter-kube-prometh-node-cluster-rsrc-use - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -data: - node-cluster-rsrc-use.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{ instance }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Saturation (Load1 per CPU)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Saturation (Major Page Faults)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"rds","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"rds","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/Receive/","stack":"A"},{"alias":"/Transmit/","stack":"B","transform":"negative-Y"}],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Receive","refId":"A"},{"expr":"instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Transmit","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Utilisation (Bytes Receive/Transmit)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/ Receive/","stack":"A"},{"alias":"/ Transmit/","stack":"B","transform":"negative-Y"}],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Receive","refId":"A"},{"expr":"instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Transmit","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Saturation (Drops Receive/Transmit)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Saturation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk IO","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk Space","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["node-exporter-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(node_time_seconds, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Node Exporter / USE Method / Cluster","version":0} \ No newline at end of file diff --git a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-rsrc-use.yaml b/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-rsrc-use.yaml deleted file mode 100644 index 693f949..0000000 --- a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-node-rsrc-use.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: node-exporter-kube-prometh-node-rsrc-use - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -data: - node-rsrc-use.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Utilisation","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Saturation","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Saturation (Load1 per CPU)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Utilisation","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Major page Faults","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Saturation (Major Page Faults)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"rds","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"rds","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/Receive/","stack":"A"},{"alias":"/Transmit/","stack":"B","transform":"negative-Y"}],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Receive","refId":"A"},{"expr":"instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Transmit","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Utilisation (Bytes Receive/Transmit)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":7,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/ Receive/","stack":"A"},{"alias":"/ Transmit/","stack":"B","transform":"negative-Y"}],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Receive","refId":"A"},{"expr":"instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"Transmit","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Saturation (Drops Receive/Transmit)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0","format":"time_series","intervalFactor":2,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Saturation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk IO","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"fillGradient":0,"gridPos":{},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk Space","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["node-exporter-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"","value":""},"datasource":"$datasource","hide":2,"includeAll":false,"label":null,"multi":false,"name":"cluster","options":[],"query":"label_values(node_time_seconds, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Node Exporter / USE Method / Node","version":0} \ No newline at end of file diff --git a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes-darwin.yaml b/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes-darwin.yaml deleted file mode 100644 index b7c5604..0000000 --- a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes-darwin.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: node-exporter-kube-prometh-nodes-darwin - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -data: - nodes-darwin.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n","format":"time_series","intervalFactor":5,"legendFormat":"{{cpu}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node_load1{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"1m load average","refId":"A"},{"expr":"node_load5{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"5m load average","refId":"B"},{"expr":"node_load15{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"15m load average","refId":"C"},{"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})","format":"time_series","intervalFactor":2,"legendFormat":"logical cores","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Load Average","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"Physical Memory","refId":"A"},{"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"Memory Used","refId":"B"},{"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"App Memory","refId":"C"},{"expr":"node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"Wired Memory","refId":"D"},{"expr":"node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"Compressed","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"datasource":"$datasource","fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"rgba(50, 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{},"id":5,"span":3,"targets":[{"expr":"(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n","format":"time_series","intervalFactor":2,"legendFormat":""}],"title":"Memory Usage","transparent":false,"type":"gauge"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/ read| written/","yaxis":1},{"alias":"/ io time/","yaxis":2}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} read","refId":"A"},{"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} written","refId":"B"},{"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} io time","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk I/O","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{},"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.displayMode","value":"gradient-gauge"},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{},"id":7,"span":6,"targets":[{"expr":"max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":""},{"expr":"max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":""}],"title":"Disk Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge","options":{}},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"transparent":false,"type":"table"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Network received (bits/s)","fill":0,"fillGradient":0,"gridPos":{},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8","format":"time_series","intervalFactor":1,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Received","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Network transmitted (bits/s)","fill":0,"fillGradient":0,"gridPos":{},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8","format":"time_series","intervalFactor":1,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Transmitted","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["node-exporter-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Node Exporter / MacOS","version":0} \ No newline at end of file diff --git a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes.yaml b/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes.yaml deleted file mode 100644 index 12a60bd..0000000 --- a/monitor/node-exporter/v1_ConfigMap_node-exporter-kube-prometh-nodes.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: node-exporter-kube-prometh-nodes - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "node-exporter" - heritage: "Helm" -data: - nodes.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n","format":"time_series","intervalFactor":5,"legendFormat":"{{cpu}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node_load1{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"1m load average","refId":"A"},{"expr":"node_load5{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"5m load average","refId":"B"},{"expr":"node_load15{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"15m load average","refId":"C"},{"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})","format":"time_series","intervalFactor":2,"legendFormat":"logical cores","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Load Average","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":9,"stack":true,"steppedLine":false,"targets":[{"expr":"(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"memory used","refId":"A"},{"expr":"node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"memory buffers","refId":"B"},{"expr":"node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"memory cached","refId":"C"},{"expr":"node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"memory free","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"datasource":"$datasource","fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"rgba(50, 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{},"id":5,"span":3,"targets":[{"expr":"100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n","format":"time_series","intervalFactor":2,"legendFormat":""}],"title":"Memory Usage","transparent":false,"type":"gauge"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{},"id":6,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/ read| written/","yaxis":1},{"alias":"/ io time/","yaxis":2}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} read","refId":"A"},{"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} written","refId":"B"},{"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","format":"time_series","intervalFactor":1,"legendFormat":"{{device}} io time","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk I/O","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"percentunit","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{},"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.displayMode","value":"gradient-gauge"},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{},"id":7,"span":6,"targets":[{"expr":"max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":""},{"expr":"max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"intervalFactor":2,"legendFormat":""}],"title":"Disk Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge","options":{}},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"transparent":false,"type":"table"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Network received (bits/s)","fill":0,"fillGradient":0,"gridPos":{},"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8","format":"time_series","intervalFactor":1,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Received","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Network transmitted (bits/s)","fill":0,"fillGradient":0,"gridPos":{},"id":9,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8","format":"time_series","intervalFactor":1,"legendFormat":"{{device}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Transmitted","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bps","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["node-exporter-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)","refresh":2,"regex":"","sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Node Exporter / Nodes","version":0} \ No newline at end of file diff --git a/monitor/node-exporter/v1_ServiceAccount_node-exporter-prometheus-node-exporter.yaml b/monitor/node-exporter/v1_ServiceAccount_node-exporter-prometheus-node-exporter.yaml deleted file mode 100644 index 2bb87ea..0000000 --- a/monitor/node-exporter/v1_ServiceAccount_node-exporter-prometheus-node-exporter.yaml +++ /dev/null @@ -1,17 +0,0 @@ ---- -# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: node-exporter-prometheus-node-exporter - namespace: vynil-monitor - labels: - helm.sh/chart: prometheus-node-exporter-4.26.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: prometheus-node-exporter - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "1.7.0" - jobLabel: node-exporter - release: node-exporter \ No newline at end of file diff --git a/monitor/node-exporter/v1_Service_node-exporter-prometheus-node-exporter.yaml b/monitor/node-exporter/v1_Service_node-exporter-prometheus-node-exporter.yaml deleted file mode 100644 index e06580c..0000000 --- a/monitor/node-exporter/v1_Service_node-exporter-prometheus-node-exporter.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: node-exporter-prometheus-node-exporter - namespace: vynil-monitor - labels: - helm.sh/chart: prometheus-node-exporter-4.26.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: metrics - app.kubernetes.io/part-of: prometheus-node-exporter - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter - app.kubernetes.io/version: "1.7.0" - jobLabel: node-exporter - release: node-exporter - annotations: - prometheus.io/scrape: "true" -spec: - type: ClusterIP - ports: - - port: 9100 - targetPort: 9100 - protocol: TCP - name: http-metrics - selector: - app.kubernetes.io/name: prometheus-node-exporter - app.kubernetes.io/instance: node-exporter \ No newline at end of file diff --git a/monitor/prometheus/datas.tf b/monitor/prometheus/datas.tf deleted file mode 100644 index 0bb6ac9..0000000 --- a/monitor/prometheus/datas.tf +++ /dev/null @@ -1,212 +0,0 @@ -locals { - authentik_url = "http://authentik.${var.domain}-auth.svc" - authentik_token = data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"] - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - rb-patch = <<-EOF - - op: replace - path: /subjects/0/namespace - value: "${var.namespace}" - EOF -} - -data "kubernetes_secret_v1" "authentik" { - metadata { - name = "authentik" - namespace = "${var.domain}-auth" - } -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1 && length(regexall("Service_prometheus",file))<1] - patches { - target { - kind = "ConfigMap" - name = "prometheus-kube-prometheus-grafana-datasource" - } - patch = <<-EOF - apiVersion: v1 - kind: ConfigMap - metadata: - name: prometheus-kube-prometheus-grafana-datasource - data: - datasource.yaml: |- - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - uid: prometheus - url: http://${var.component}-${var.instance}.${var.namespace}:9090/ - access: proxy - isDefault: false - jsonData: - httpMethod: POST - timeInterval: 30s - EOF - } - patches { - target { - kind = "ServiceMonitor" - name = "prometheus-kube-prometheus-prometheus" - } - patch = <<-EOF - - op: replace - path: /spec/namespaceSelector/matchNames/0 - value: "${var.namespace}" - EOF - } - - patches { - target { - kind = "PrometheusRule" - name = "prometheus-kube-prometheus-prometheus" - } - patch = <<-EOF - apiVersion: monitoring.coreos.com/v1 - kind: PrometheusRule - metadata: - name: prometheus-kube-prometheus-prometheus - spec: - groups: - - name: prometheus - rules: - - alert: PrometheusBadConfig - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_config_last_reload_successful{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) == 0 - - alert: PrometheusSDRefreshFailure - expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[10m]) > 0 - - alert: PrometheusNotificationQueueRunningFull - expr: |- - # Without min_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - predict_linear(prometheus_notifications_queue_length{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m], 60 * 30) - > - min_over_time(prometheus_notifications_queue_capacity{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - ) - - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers - expr: |- - ( - rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - ) - * 100 - > 1 - - alert: PrometheusNotConnectedToAlertmanagers - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) < 1 - - alert: PrometheusTSDBReloadsFailing - expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[3h]) > 0 - - alert: PrometheusTSDBCompactionsFailing - expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[3h]) > 0 - - alert: PrometheusNotIngestingSamples - expr: |- - ( - rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) <= 0 - and - ( - sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}) > 0 - or - sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}) > 0 - ) - ) - - alert: PrometheusDuplicateTimestamps - expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusOutOfOrderTimestamps - expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusRemoteStorageFailures - expr: |- - ( - (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])) - / - ( - (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])) - + - (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m])) - ) - ) - * 100 - > 1 - - alert: PrometheusRemoteWriteBehind - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - - ignoring(remote_name, url) group_right - max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - ) - > 120 - - alert: PrometheusRemoteWriteDesiredShards - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - > - max_over_time(prometheus_remote_storage_shards_max{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) - ) - - alert: PrometheusRuleFailures - expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusMissingRuleEvaluations - expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusTargetLimitHit - expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusLabelLimitHit - expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusScrapeBodySizeLimitHit - expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusScrapeSampleLimitHit - expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0 - - alert: PrometheusTargetSyncFailure - expr: increase(prometheus_target_sync_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[30m]) > 0 - - alert: PrometheusHighQueryLoad - expr: avg_over_time(prometheus_engine_queries{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}"}[5m]) > 0.8 - - alert: PrometheusErrorSendingAlertsToAnyAlertmanager - expr: |- - min without (alertmanager) ( - rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="${var.namespace}",alertmanager!~``}[5m]) - ) - * 100 - > 3 - EOF - } - patches { - target { - kind = "ServiceMonitor" - name = "prometheus-community-prometheus-node-exporter" - } - patch = <<-EOF - - op: replace - path: /spec/selector/matchLabels/app.kubernetes.io~1instance - value: "${var.instance}" - EOF - } -} - -data "kustomization_overlay" "data_no_ns" { - resources = [for file in fileset(path.module, "*.yaml"): file if (length(regexall("ClusterRole",file))>0 || length(regexall("Service_prometheus",file))>0)] - - patches { - target { - kind = "ClusterRoleBinding" - name = "prometheus-kube-prometheus-prometheus" - } - patch = local.rb-patch - } -} diff --git a/monitor/prometheus/index.yaml b/monitor/prometheus/index.yaml deleted file mode 100644 index a1b3c99..0000000 --- a/monitor/prometheus/index.yaml +++ /dev/null @@ -1,125 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: prometheus - description: null -options: - domain_name: - default: your_company.com - examples: - - your_company.com - type: string - domain: - default: your-company - examples: - - your-company - type: string - replicas: - default: 1 - examples: - - 1 - type: integer - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string - images: - default: - prometheus: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/prometheus - tag: v2.49.1 - examples: - - prometheus: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/prometheus - tag: v2.49.1 - properties: - prometheus: - default: - pullPolicy: IfNotPresent - registry: quay.io - repository: prometheus/prometheus - tag: v2.49.1 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: quay.io - type: string - repository: - default: prometheus/prometheus - type: string - tag: - default: v2.49.1 - type: string - type: object - type: object - sub_domain: - default: prometheus - examples: - - prometheus - type: string - ingress_class: - default: traefik - examples: - - traefik - type: string - logLevel: - default: info - examples: - - info - type: string - app_group: - default: monitor - examples: - - monitor - type: string - listenLocal: - default: false - examples: - - false - type: boolean - enableAdminAPI: - default: false - examples: - - false - type: boolean - shards: - default: 1 - examples: - - 1 - type: integer - retention: - default: 10d - examples: - - 10d - type: string - alertmanager: - default: alertmanager-alertmanager - examples: - - alertmanager-alertmanager - type: string -dependencies: -- dist: null - category: share - component: authentik-forward -providers: - kubernetes: true - authentik: true - kubectl: true - postgresql: null - restapi: true - http: true - gitea: null -tfaddtype: null diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubelet.rules.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubelet.rules.yaml deleted file mode 100644 index 80fc52e..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubelet.rules.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-kube-prometheus-kubelet.rules - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - groups: - - name: kubelet.rules - rules: - - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.99' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.9' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on (cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.5' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubernetes-system-kube-proxy.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubernetes-system-kube-proxy.yaml deleted file mode 100644 index 1b06752..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-kubernetes-system-kube-proxy.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-kube-prometheus-kubernetes-system-kube-proxy - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - groups: - - name: kubernetes-system-kube-proxy - rules: - - alert: KubeProxyDown - annotations: - description: KubeProxy has disappeared from Prometheus target discovery. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown - summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="kube-proxy"} == 1) - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index c2cc6eb..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_PrometheusRule_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,280 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: prometheus-kube-prometheus-prometheus - namespace: vynil-monitor - labels: - app: kube-prometheus-stack - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - groups: - - name: prometheus - rules: - - alert: PrometheusBadConfig - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig - summary: Failed Prometheus configuration reload. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_config_last_reload_successful{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) == 0 - for: 10m - labels: - severity: critical - - alert: PrometheusSDRefreshFailure - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to refresh SD with mechanism {{$labels.mechanism}}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheussdrefreshfailure - summary: Failed Prometheus SD refresh. - expr: increase(prometheus_sd_refresh_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[10m]) > 0 - for: 20m - labels: - severity: warning - - alert: PrometheusNotificationQueueRunningFull - annotations: - description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull - summary: Prometheus alert notification queue predicted to run full in less than 30m. - expr: |- - # Without min_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - predict_linear(prometheus_notifications_queue_length{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m], 60 * 30) - > - min_over_time(prometheus_notifications_queue_capacity{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - ) - for: 15m - labels: - severity: warning - - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers - annotations: - description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers - summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. - expr: |- - ( - rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - ) - * 100 - > 1 - for: 15m - labels: - severity: warning - - alert: PrometheusNotConnectedToAlertmanagers - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers - summary: Prometheus is not connected to any Alertmanagers. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) < 1 - for: 10m - labels: - severity: warning - - alert: PrometheusTSDBReloadsFailing - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing - summary: Prometheus has issues reloading blocks from disk. - expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[3h]) > 0 - for: 4h - labels: - severity: warning - - alert: PrometheusTSDBCompactionsFailing - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing - summary: Prometheus has issues compacting blocks. - expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[3h]) > 0 - for: 4h - labels: - severity: warning - - alert: PrometheusNotIngestingSamples - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples - summary: Prometheus is not ingesting samples. - expr: |- - ( - rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) <= 0 - and - ( - sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}) > 0 - or - sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}) > 0 - ) - ) - for: 10m - labels: - severity: warning - - alert: PrometheusDuplicateTimestamps - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps - summary: Prometheus is dropping samples with duplicate timestamps. - expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 10m - labels: - severity: warning - - alert: PrometheusOutOfOrderTimestamps - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps - summary: Prometheus drops samples with out-of-order timestamps. - expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 10m - labels: - severity: warning - - alert: PrometheusRemoteStorageFailures - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures - summary: Prometheus fails to send samples to remote storage. - expr: |- - ( - (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])) - / - ( - (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])) - + - (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m])) - ) - ) - * 100 - > 1 - for: 15m - labels: - severity: critical - - alert: PrometheusRemoteWriteBehind - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind - summary: Prometheus remote write is behind. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - - ignoring(remote_name, url) group_right - max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - ) - > 120 - for: 15m - labels: - severity: critical - - alert: PrometheusRemoteWriteDesiredShards - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}` $labels.instance | query | first | value }}. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards - summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. - expr: |- - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - > - max_over_time(prometheus_remote_storage_shards_max{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) - ) - for: 15m - labels: - severity: warning - - alert: PrometheusRuleFailures - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures - summary: Prometheus is failing rule evaluations. - expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: PrometheusMissingRuleEvaluations - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations - summary: Prometheus is missing rule evaluations due to slow rule group evaluation. - expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: PrometheusTargetLimitHit - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit - summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. - expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: PrometheusLabelLimitHit - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit - summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. - expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: PrometheusScrapeBodySizeLimitHit - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit - summary: Prometheus has dropped some targets that exceeded body size limit. - expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: PrometheusScrapeSampleLimitHit - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit - summary: Prometheus has failed scrapes that have exceeded the configured sample limit. - expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0 - for: 15m - labels: - severity: warning - - alert: PrometheusTargetSyncFailure - annotations: - description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure - summary: Prometheus has failed to sync targets. - expr: increase(prometheus_target_sync_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[30m]) > 0 - for: 5m - labels: - severity: critical - - alert: PrometheusHighQueryLoad - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload - summary: Prometheus is reaching its maximum capacity serving concurrent requests. - expr: avg_over_time(prometheus_engine_queries{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor"}[5m]) > 0.8 - for: 15m - labels: - severity: warning - - alert: PrometheusErrorSendingAlertsToAnyAlertmanager - annotations: - description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' - runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager - summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. - expr: |- - min without (alertmanager) ( - rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="vynil-monitor",alertmanager!~``}[5m]) - ) - * 100 - > 3 - for: 15m - labels: - severity: critical \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-coredns.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-coredns.yaml deleted file mode 100644 index d27733f..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-coredns.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-kube-prometheus-coredns - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-coredns - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app: kube-prometheus-stack-coredns - release: "prometheus" - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kube-proxy.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kube-proxy.yaml deleted file mode 100644 index d8bbb5a..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kube-proxy.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-kube-prometheus-kube-proxy - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-kube-proxy - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - jobLabel: jobLabel - - selector: - matchLabels: - app: kube-prometheus-stack-kube-proxy - release: "prometheus" - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kubelet.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kubelet.yaml deleted file mode 100644 index 653325c..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-kubelet.yaml +++ /dev/null @@ -1,95 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-kube-prometheus-kubelet - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-kubelet - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - - attachMetadata: - node: false - endpoints: - - port: https-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - honorTimestamps: true - relabelings: - - action: replace - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - port: https-metrics - scheme: https - path: /metrics/cadvisor - honorLabels: true - honorTimestamps: true - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - metricRelabelings: - - action: drop - regex: container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total) - sourceLabels: - - __name__ - - action: drop - regex: container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total) - sourceLabels: - - __name__ - - action: drop - regex: container_memory_(mapped_file|swap) - sourceLabels: - - __name__ - - action: drop - regex: container_(file_descriptors|tasks_state|threads_max) - sourceLabels: - - __name__ - - action: drop - regex: container_spec.* - sourceLabels: - - __name__ - - action: drop - regex: .+; - sourceLabels: - - id - - pod - relabelings: - - action: replace - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - port: https-metrics - scheme: https - path: /metrics/probes - honorLabels: true - honorTimestamps: true - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - relabelings: - - action: replace - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - app.kubernetes.io/name: kubelet - k8s-app: kubelet \ No newline at end of file diff --git a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index d527bb0..0000000 --- a/monitor/prometheus/monitoring.coreos.com_v1_ServiceMonitor_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-kube-prometheus-prometheus - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-prometheus - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -spec: - - selector: - matchLabels: - app: kube-prometheus-stack-prometheus - release: "prometheus" - self-monitor: "true" - namespaceSelector: - matchNames: - - "vynil-monitor" - endpoints: - - port: http-web - path: "/metrics" - - port: reloader-web - scheme: http - path: "/metrics" \ No newline at end of file diff --git a/monitor/prometheus/presentation.tf b/monitor/prometheus/presentation.tf deleted file mode 100644 index 2f42d6e..0000000 --- a/monitor/prometheus/presentation.tf +++ /dev/null @@ -1,75 +0,0 @@ -locals { - dns_name = "${var.sub_domain}.${var.domain_name}" - dns_names = [local.dns_name] - app_name = var.component == var.instance ? var.instance : format("%s-%s", var.component, var.instance) - icon = "favicon.ico" - request_headers = { - "Content-Type" = "application/json" - Authorization = "Bearer ${data.kubernetes_secret_v1.authentik.data["AUTHENTIK_BOOTSTRAP_TOKEN"]}" - } - service = { - "name" = "prometheus-operated" - "port" = { - "number" = 9090 - } - } -} - -module "ingress" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//ingress" - component = "" - instance = var.instance - namespace = var.namespace - issuer = var.issuer - ingress_class = var.ingress_class - labels = local.common-labels - dns_names = local.dns_names - middlewares = [module.forward.middleware] - services = [local.service] - providers = { - kubectl = kubectl - } -} - -module "application" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//application" - component = var.component - instance = var.instance - app_group = var.app_group - dns_name = local.dns_name - icon = local.icon - protocol_provider = module.forward.provider-id - providers = { - authentik = authentik - } -} - -provider "restapi" { - uri = "http://authentik.${var.domain}-auth.svc/api/v3/" - headers = local.request_headers - create_method = "PATCH" - update_method = "PATCH" - destroy_method = "PATCH" - write_returns_object = true - id_attribute = "name" -} - -module "forward" { - source = "git::https://git.solidite.fr/vynil/kydah-modules.git//forward" - component = var.component - instance = var.instance - domain = var.domain - namespace = var.namespace - ingress_class = var.ingress_class - labels = local.common-labels - dns_names = local.dns_names - service = local.service - icon = local.icon - request_headers = local.request_headers - providers = { - restapi = restapi - http = http - kubectl = kubectl - authentik = authentik - } -} diff --git a/monitor/prometheus/prometheus.tf b/monitor/prometheus/prometheus.tf deleted file mode 100644 index 2e1ecdb..0000000 --- a/monitor/prometheus/prometheus.tf +++ /dev/null @@ -1,62 +0,0 @@ -resource "kubectl_manifest" "prometheus" { - yaml_body = <<-EOF - apiVersion: monitoring.coreos.com/v1 - kind: Prometheus - metadata: - name: prometheus - namespace: "${var.namespace}" - labels: ${jsonencode(local.common-labels)} - spec: - image: "${var.images.prometheus.registry}/${var.images.prometheus.repository}:${var.images.prometheus.tag}" - version: ${var.images.prometheus.tag} - externalUrl: http://${var.component}-${var.instance}.${var.namespace}:9090 - replicas: ${var.replicas} - shards: ${var.shards} - logLevel: ${var.logLevel} - listenLocal: ${var.listenLocal} - enableAdminAPI: ${var.enableAdminAPI} - retention: "${var.retention}" - tsdb: - outOfOrderTimeWindow: 0s - walCompression: true - routePrefix: "/" - alerting: - alertmanagers: - - namespace: ${var.namespace} - name: ${var.alertmanager} - port: http-web - pathPrefix: "/" - apiVersion: v2 - scrapeConfigNamespaceSelector: {} - scrapeConfigSelector: - matchLabels: - app.kubernetes.io/managed-by: vynil - serviceMonitorNamespaceSelector: {} - serviceMonitorSelector: - matchLabels: - app.kubernetes.io/managed-by: vynil - podMonitorNamespaceSelector: {} - podMonitorSelector: - matchLabels: - app.kubernetes.io/managed-by: vynil - probeNamespaceSelector: {} - probeSelector: - matchLabels: - app.kubernetes.io/managed-by: vynil - ruleNamespaceSelector: {} - ruleSelector: - matchLabels: - app.kubernetes.io/managed-by: vynil - serviceAccountName: prometheus-kube-prometheus-prometheus - securityContext: - fsGroup: 2000 - runAsGroup: 2000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - - portName: http-web - hostNetwork: false - EOF -} diff --git a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index 0cb2b23..0000000 --- a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRoleBinding_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: prometheus-kube-prometheus-prometheus - labels: - app: kube-prometheus-stack-prometheus - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus-kube-prometheus-prometheus -subjects: - - kind: ServiceAccount - name: prometheus-kube-prometheus-prometheus - namespace: vynil-monitor \ No newline at end of file diff --git a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index c01e748..0000000 --- a/monitor/prometheus/rbac.authorization.k8s.io_v1_ClusterRole_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: prometheus-kube-prometheus-prometheus - labels: - app: kube-prometheus-stack-prometheus - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -rules: -# This permission are not in the kube-prometheus repo -# they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml -- apiGroups: [""] - resources: - - nodes - - nodes/metrics - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- apiGroups: - - "networking.k8s.io" - resources: - - ingresses - verbs: ["get", "list", "watch"] -- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] - verbs: ["get"] \ No newline at end of file diff --git a/monitor/prometheus/ressources_no_ns.tf b/monitor/prometheus/ressources_no_ns.tf deleted file mode 100644 index 9fa58b7..0000000 --- a/monitor/prometheus/ressources_no_ns.tf +++ /dev/null @@ -1,45 +0,0 @@ - -# first loop through resources in ids_prio[0] -resource "kustomization_resource" "pre_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) -} - -# then loop through resources in ids_prio[1] -# and set an explicit depends_on on kustomization_resource.pre -# wait 2 minutes for any deployment or daemonset to become ready -resource "kustomization_resource" "main_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - wait = true - timeouts { - create = "5m" - update = "5m" - } - - depends_on = [kustomization_resource.pre_no_ns] -} - -# finally, loop through resources in ids_prio[2] -# and set an explicit depends_on on kustomization_resource.main -resource "kustomization_resource" "post_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - - depends_on = [kustomization_resource.main_no_ns] -} diff --git a/monitor/prometheus/svc.tf b/monitor/prometheus/svc.tf deleted file mode 100644 index 3b6bab6..0000000 --- a/monitor/prometheus/svc.tf +++ /dev/null @@ -1,33 +0,0 @@ -locals { - svc-label = merge(local.common-labels, { - "app" = "kube-prometheus-stack-prometheus" - "release" = "prometheus" - "self-monitor" = "true" - - }) -} -resource "kubectl_manifest" "svc" { - yaml_body = <<-EOF - apiVersion: v1 - kind: Service - metadata: - name: "${var.component}-${var.instance}" - namespace: "${var.namespace}" - labels: ${jsonencode(local.svc-label)} - spec: - ports: - - name: http-web - port: 9090 - targetPort: 9090 - - name: reloader-web - appProtocol: http - port: 8080 - targetPort: reloader-web - publishNotReadyAddresses: false - selector: - app.kubernetes.io/name: prometheus - operator.prometheus.io/name: ${kubectl_manifest.prometheus.name} - sessionAffinity: None - type: "ClusterIP" - EOF -} diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-grafana-datasource.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-grafana-datasource.yaml deleted file mode 100644 index e81431f..0000000 --- a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-grafana-datasource.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: prometheus-kube-prometheus-grafana-datasource - namespace: vynil-monitor - labels: - grafana_datasource: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: - datasource.yaml: |- - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - uid: prometheus - url: http://prometheus-kube-prometheus-prometheus.vynil-monitor:9090/ - access: proxy - isDefault: true - jsonData: - httpMethod: POST - timeInterval: 30s - - name: Alertmanager - type: alertmanager - uid: alertmanager - url: http://prometheus-kube-prometheus-alertmanager.vynil-monitor:9093/ - access: proxy - jsonData: - handleGrafanaManagedAlerts: false - implementation: prometheus \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-k8s-coredns.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-k8s-coredns.yaml deleted file mode 100644 index d1d922d..0000000 --- a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-k8s-coredns.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-kube-prometheus-k8s-coredns - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: - k8s-coredns.json: |- - {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS dashboard by buhay.","editable":true,"gnetId":12539,"graphTooltip":0,"iteration":1603798405693,"links":[{"icon":"external link","tags":[],"targetBlank":true,"title":"CoreDNS.io","type":"link","url":"https://coredns.io"}],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":0,"y":0},"hiddenSeries":false,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (proto) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (proto)","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (total)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":8,"y":0},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2},{"alias":"other","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_type_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type) or \nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","interval":"","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (by qtype)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":8,"x":16,"y":0},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (zone) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (zone)","interval":"","intervalFactor":2,"legendFormat":"{{ zone }}","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (by zone)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":7},"hiddenSeries":false,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"total","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_request_do_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_do_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m]))","interval":"","intervalFactor":2,"legendFormat":"DO","refId":"A","step":40},{"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",instance=~\"$instance\"}[5m]))","interval":"","intervalFactor":2,"legendFormat":"total","refId":"B","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (DO bit)","tooltip":{"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":6,"x":12,"y":7},"hiddenSeries":false,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"tcp:90","yaxis":2},{"alias":"tcp:99 ","yaxis":2},{"alias":"tcp:50","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (size, udp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":6,"x":18,"y":7},"hiddenSeries":false,"id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"tcp:90","yaxis":1},{"alias":"tcp:99 ","yaxis":1},{"alias":"tcp:50","yaxis":1}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Requests (size,tcp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":14},"hiddenSeries":false,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_dns_response_rcode_count_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (rcode) or\nsum(rate(coredns_dns_responses_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (rcode)","interval":"","intervalFactor":2,"legendFormat":"{{ rcode }}","refId":"A","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (by rcode)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":14},"hiddenSeries":false,"id":32,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le, job))","format":"time_series","intervalFactor":2,"legendFormat":"99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"50%","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (duration)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":21},"hiddenSeries":false,"id":18,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"udp:50%","yaxis":1},{"alias":"tcp:50%","yaxis":2},{"alias":"tcp:90%","yaxis":2},{"alias":"tcp:99%","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","interval":"","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ","hide":false,"intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (size, udp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":21},"hiddenSeries":false,"id":20,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"udp:50%","yaxis":1},{"alias":"tcp:50%","yaxis":1},{"alias":"tcp:90%","yaxis":1},{"alias":"tcp:99%","yaxis":1}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"expr":"histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"expr":"histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Responses (size, tcp)","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":0,"y":28},"hiddenSeries":false,"id":22,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(coredns_cache_size{job=~\"$job\",instance=~\"$instance\"}) by (type) or\nsum(coredns_cache_entries{job=~\"$job\",instance=~\"$instance\"}) by (type)","interval":"","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Cache (size)","tooltip":{"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","logBase":1,"max":null,"min":0,"show":true},{"format":"short","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","editable":true,"error":false,"fieldConfig":{"defaults":{"custom":{},"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"grid":{},"gridPos":{"h":7,"w":12,"x":12,"y":28},"hiddenSeries":false,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"percentage":false,"pluginVersion":"7.2.0","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"misses","yaxis":2}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(coredns_cache_hits_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"hits:{{ type }}","refId":"A","step":40},{"expr":"sum(rate(coredns_cache_misses_total{job=~\"$job\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"misses","refId":"B","step":40}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Cache (hitrate)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","logBase":1,"max":null,"min":0,"show":true},{"format":"pps","logBase":1,"max":null,"min":0,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":"10s","schemaVersion":26,"style":"dark","tags":["dns","coredns"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"includeAll":false,"label":null,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":"coredns","value":"coredns"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(coredns_dns_requests_total, job)","hide":0,"includeAll":true,"label":"Job","multi":false,"name":"job","options":[],"query":{"query":"label_values(coredns_dns_requests_total, job)","refId":"StandardVariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"allValue":".*","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(coredns_dns_requests_total{job=~\"$job\"}, instance)","hide":0,"includeAll":true,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(coredns_dns_requests_total{job=~\"$job\"}, instance)","refresh":1,"regex":"","skipUrlSync":false,"sort":3,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone":"Europe/Paris","title":"CoreDNS","uid":"vkQ0UHxik","version":3} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-kubelet.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-kubelet.yaml deleted file mode 100644 index 0c212c7..0000000 --- a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-kubelet.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-kube-prometheus-kubelet - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: - kubelet.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"panels":[{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":2,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"title":"Running Kubelets","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":4,"y":0},"id":3,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Running Pods","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":8,"y":0},"id":4,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Running Containers","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":12,"y":0},"id":5,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Actual Volume Count","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":16,"y":0},"id":6,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Desired Volume Count","transparent":false,"type":"stat"},{"datasource":"$datasource","fieldConfig":{"defaults":{"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[]},"unit":"none"}},"gridPos":{"h":7,"w":4,"x":20,"y":0},"id":7,"links":[],"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"7","targets":[{"expr":"sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"title":"Config Error Count","transparent":false,"type":"stat"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":8,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":9,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation Error Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":10,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Operation duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":11,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} pod","refId":"A"},{"expr":"sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} worker","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pod Start Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":12,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} pod","refId":"A"},{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} worker","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pod Start Duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":13,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":14,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Error Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":15,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Storage Operation Duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":16,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","format":"time_series","intervalFactor":2,"legendFormat":"{{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cgroup manager operation rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":17,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{operation_type}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cgroup manager 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"Pod lifecycle event generator","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":18,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":19,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist interval","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":20,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"PLEG relist duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":63},"id":21,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"RPC Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":22,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} {{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Request duration 99th quantile","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":0,"y":77},"id":23,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":8,"y":77},"id":24,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":8,"x":16,"y":77},"id":25,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":"instance","multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Kubelet","uid":"3138fa155d5915769fbded898ac09fd9","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index 596c620..0000000 --- a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-kube-prometheus-prometheus - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: - prometheus.json: |- - {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"60s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Count","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"hidden","unit":"short"},{"alias":"Uptime","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"s"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"Instance","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"instance","thresholds":[],"type":"number","unit":"short"},{"alias":"Job","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"job","thresholds":[],"type":"number","unit":"short"},{"alias":"Version","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"version","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"count by (cluster, job, instance, version) (prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":"","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Prometheus Stats","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Prometheus Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[5m])) by (cluster, job, scrape_job, instance) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Target Sync","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"})","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Targets","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Discovery","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}} {{interval}} configured","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Average Scrape Interval Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded body size limit: {{cluster}} {{job}} {{instance}}","legendLink":null},{"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded sample limit: {{cluster}} {{job}} {{instance}}","legendLink":null},{"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"duplicate timestamp: {{cluster}} {{job}} {{instance}}","legendLink":null},{"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out of bounds: {{cluster}} {{job}} {{instance}}","legendLink":null},{"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out of order: {{cluster}} {{job}} {{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Scrape failures","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(prometheus_tsdb_head_samples_appended_total{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m])","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Appended Samples","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Retrieval","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"prometheus_tsdb_head_series{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}} head series","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Head Series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"prometheus_tsdb_head_chunks{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}} head chunks","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Head Chunks","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(prometheus_engine_query_duration_seconds_count{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Query Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}) * 1e3","format":"time_series","legendFormat":"{{slice}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Stage Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Query","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["prometheus-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":".+","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","hide":2,"includeAll":true,"label":"cluster","multi":true,"name":"cluster","options":[],"query":"label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","hide":0,"includeAll":true,"label":"job","multi":true,"name":"job","options":[],"query":"label_values(prometheus_build_info{cluster=~\"$cluster\"}, job)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".+","current":{"selected":true,"text":"All","value":"$__all"},"datasource":"$datasource","hide":0,"includeAll":true,"label":"instance","multi":true,"name":"instance","options":[],"query":"label_values(prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\"}, instance)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Prometheus / Overview","uid":"","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-proxy.yaml b/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-proxy.yaml deleted file mode 100644 index 67b68e9..0000000 --- a/monitor/prometheus/v1_ConfigMap_prometheus-kube-prometheus-proxy.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: vynil-monitor - name: prometheus-kube-prometheus-proxy - annotations: - {} - labels: - grafana_dashboard: "1" - app: kube-prometheus-stack-grafana - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: - proxy.json: |- - {"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"$datasource","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{},"id":2,"interval":"1m","legend":{"alignAsTable":true,"rightSide":true},"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"span":2,"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A"}],"thresholds":"","title":"Up","tooltip":{"shared":false},"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"min"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rules Sync Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":5,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Rule Sync Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Programming Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Programming Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"2xx","refId":"A"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"3xx","refId":"B"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"4xx","refId":"C"},{"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"5xx","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Kube API Request Rate","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":8,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Post Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{verb}} {{url}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Get Request Latency 99th Quantile","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":11,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU usage","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{},"id":12,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Goroutines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Dashboard Row","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":2,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"kube-proxy\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":true,"label":null,"multi":false,"name":"instance","options":[],"query":"label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "Europe/Paris","title":"Kubernetes / Proxy","uid":"632e265de029684c40b21cb76bca4f94","version":0} \ No newline at end of file diff --git a/monitor/prometheus/v1_Secret_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/v1_Secret_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index a669a1f..0000000 --- a/monitor/prometheus/v1_Secret_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Source: kube-prometheus-stack/templates/prometheus/secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: prometheus-kube-prometheus-prometheus - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-prometheus - app.kubernetes.io/component: prometheus - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" -data: \ No newline at end of file diff --git a/monitor/prometheus/v1_ServiceAccount_prometheus-kube-prometheus-prometheus.yaml b/monitor/prometheus/v1_ServiceAccount_prometheus-kube-prometheus-prometheus.yaml deleted file mode 100644 index 326990d..0000000 --- a/monitor/prometheus/v1_ServiceAccount_prometheus-kube-prometheus-prometheus.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -# Source: kube-prometheus-stack/templates/prometheus/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus-kube-prometheus-prometheus - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-prometheus - app.kubernetes.io/name: kube-prometheus-stack-prometheus - app.kubernetes.io/component: prometheus - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" \ No newline at end of file diff --git a/monitor/prometheus/v1_Service_prometheus-kube-prometheus-coredns.yaml b/monitor/prometheus/v1_Service_prometheus-kube-prometheus-coredns.yaml deleted file mode 100644 index 9b3c38e..0000000 --- a/monitor/prometheus/v1_Service_prometheus-kube-prometheus-coredns.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/core-dns/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: prometheus-kube-prometheus-coredns - labels: - app: kube-prometheus-stack-coredns - jobLabel: coredns - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 9153 - protocol: TCP - targetPort: 9153 - selector: - k8s-app: kube-dns \ No newline at end of file diff --git a/monitor/prometheus/v1_Service_prometheus-kube-prometheus-kube-proxy.yaml b/monitor/prometheus/v1_Service_prometheus-kube-prometheus-kube-proxy.yaml deleted file mode 100644 index 7c1540b..0000000 --- a/monitor/prometheus/v1_Service_prometheus-kube-prometheus-kube-proxy.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Source: kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: prometheus-kube-prometheus-kube-proxy - labels: - app: kube-prometheus-stack-kube-proxy - jobLabel: kube-proxy - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus" - heritage: "Helm" - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 10249 - protocol: TCP - targetPort: 10249 - selector: - k8s-app: kube-proxy - type: ClusterIP \ No newline at end of file diff --git a/monitor/promtail/apps_v1_DaemonSet_promtail.yaml b/monitor/promtail/apps_v1_DaemonSet_promtail.yaml deleted file mode 100644 index 45085c7..0000000 --- a/monitor/promtail/apps_v1_DaemonSet_promtail.yaml +++ /dev/null @@ -1,95 +0,0 @@ -# Source: promtail/templates/daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: promtail - namespace: vynil-monitor - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm - annotations: - configmap.reloader.stakater.com/reload: promtail -spec: - selector: - matchLabels: - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - updateStrategy: - {} - template: - metadata: - labels: - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - annotations: - checksum/config: 00064144e25aa9a0c6068578ba972e5af7d36ef6c203575fa021e9986a3194c3 - spec: - serviceAccountName: promtail - enableServiceLinks: true - securityContext: - runAsGroup: 0 - runAsUser: 0 - containers: - - name: promtail - image: "docker.io/grafana/promtail:2.9.3" - imagePullPolicy: IfNotPresent - args: - - "-config.file=/etc/promtail/promtail.yaml" - volumeMounts: - - name: config - mountPath: /etc/promtail - - mountPath: /run/promtail - name: run - - mountPath: /var/lib/docker/containers - name: containers - readOnly: true - - mountPath: /var/log/pods - name: pods - readOnly: true - env: - - name: HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - ports: - - name: http-metrics - containerPort: 3101 - protocol: TCP - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - readinessProbe: - failureThreshold: 5 - httpGet: - path: '/ready' - port: http-metrics - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - tolerations: - - effect: NoSchedule - key: node-role.kubernetes.io/master - operator: Exists - - effect: NoSchedule - key: node-role.kubernetes.io/control-plane - operator: Exists - volumes: - - name: config - configMap: - name: promtail - - hostPath: - path: /run/promtail - name: run - - hostPath: - path: /var/lib/docker/containers - name: containers - - hostPath: - path: /var/log/pods - name: pods \ No newline at end of file diff --git a/monitor/promtail/config.tf b/monitor/promtail/config.tf deleted file mode 100644 index 0cbf1a6..0000000 --- a/monitor/promtail/config.tf +++ /dev/null @@ -1,94 +0,0 @@ -resource "kubectl_manifest" "config" { - yaml_body = <<-EOF - apiVersion: v1 - kind: ConfigMap - metadata: - name: promtail - namespace: "${var.namespace}" - labels: ${jsonencode(local.common-labels)} - data: - promtail.yaml: | - server: - log_level: ${var.log_level} - log_format: ${var.log_format} - http_listen_port: 3101 - clients: - - url: http://${var.loki-url}:3100/loki/api/v1/push - positions: - filename: /run/promtail/positions.yaml - scrape_configs: - - job_name: kubernetes-pods - pipeline_stages: - - cri: {} - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: - - __meta_kubernetes_pod_controller_name - regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})? - action: replace - target_label: __tmp_controller_name - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_name - - __meta_kubernetes_pod_label_app - - __tmp_controller_name - - __meta_kubernetes_pod_name - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: app - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_instance - - __meta_kubernetes_pod_label_instance - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: instance - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_component - - __meta_kubernetes_pod_label_component - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: component - - action: replace - source_labels: - - __meta_kubernetes_pod_node_name - target_label: node_name - - action: replace - source_labels: - - __meta_kubernetes_namespace - target_label: namespace - - action: replace - replacement: $1 - separator: / - source_labels: - - namespace - - app - target_label: job - - action: replace - source_labels: - - __meta_kubernetes_pod_name - target_label: pod - - action: replace - source_labels: - - __meta_kubernetes_pod_container_name - target_label: container - - action: replace - replacement: /var/log/pods/*$1/*.log - separator: / - source_labels: - - __meta_kubernetes_pod_uid - - __meta_kubernetes_pod_container_name - target_label: __path__ - - action: replace - regex: true/(.*) - replacement: /var/log/pods/*$1/*.log - separator: / - source_labels: - - __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash - - __meta_kubernetes_pod_annotation_kubernetes_io_config_hash - - __meta_kubernetes_pod_container_name - target_label: __path__ - limits_config: - tracing: - enabled: false - EOF -} diff --git a/monitor/promtail/datas.tf b/monitor/promtail/datas.tf deleted file mode 100644 index 977a334..0000000 --- a/monitor/promtail/datas.tf +++ /dev/null @@ -1,38 +0,0 @@ -locals { - common-labels = { - "vynil.solidite.fr/owner-name" = var.instance - "vynil.solidite.fr/owner-namespace" = var.namespace - "vynil.solidite.fr/owner-category" = var.category - "vynil.solidite.fr/owner-component" = var.component - "app.kubernetes.io/managed-by" = "vynil" - "app.kubernetes.io/instance" = var.instance - } - rb-patch = <<-EOF - - op: replace - path: /subjects/0/namespace - value: "${var.namespace}" - EOF -} - -data "kustomization_overlay" "data" { - common_labels = local.common-labels - namespace = var.namespace - resources = [for file in fileset(path.module, "*.yaml"): file if file != "index.yaml" && length(regexall("ClusterRole",file))<1] - images { - name = "docker.io/grafana/promtail" - new_name = "${var.images.promtail.registry}/${var.images.promtail.repository}" - new_tag = "${var.images.promtail.tag}" - } -} -data "kustomization_overlay" "data_no_ns" { - common_labels = local.common-labels - resources = [for file in fileset(path.module, "*.yaml"): file if length(regexall("ClusterRole",file))>0] - - patches { - target { - kind = "ClusterRoleBinding" - name = "promtail" - } - patch = local.rb-patch - } -} diff --git a/monitor/promtail/index.yaml b/monitor/promtail/index.yaml deleted file mode 100644 index 5ec1636..0000000 --- a/monitor/promtail/index.yaml +++ /dev/null @@ -1,72 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: promtail - description: null -options: - images: - default: - promtail: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/promtail - tag: 2.9.2 - examples: - - promtail: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/promtail - tag: 2.9.2 - properties: - promtail: - default: - pullPolicy: IfNotPresent - registry: docker.io - repository: grafana/promtail - tag: 2.9.2 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: docker.io - type: string - repository: - default: grafana/promtail - type: string - tag: - default: 2.9.2 - type: string - type: object - type: object - log_level: - default: info - examples: - - info - type: string - log_format: - default: logfmt - examples: - - logfmt - type: string - loki-url: - default: loki - examples: - - loki - type: string -dependencies: [] -providers: - kubernetes: true - authentik: null - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/promtail/monitoring.coreos.com_v1_ServiceMonitor_promtail.yaml b/monitor/promtail/monitoring.coreos.com_v1_ServiceMonitor_promtail.yaml deleted file mode 100644 index cd83030..0000000 --- a/monitor/promtail/monitoring.coreos.com_v1_ServiceMonitor_promtail.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Source: promtail/templates/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: promtail - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -spec: - selector: - matchLabels: - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - endpoints: - - port: http-metrics - scheme: http \ No newline at end of file diff --git a/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRoleBinding_promtail.yaml b/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRoleBinding_promtail.yaml deleted file mode 100644 index 98ecc74..0000000 --- a/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRoleBinding_promtail.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Source: promtail/templates/clusterrolebinding.yaml -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: promtail - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -subjects: - - kind: ServiceAccount - name: promtail - namespace: vynil-monitor -roleRef: - kind: ClusterRole - name: promtail - apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRole_promtail.yaml b/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRole_promtail.yaml deleted file mode 100644 index 5870614..0000000 --- a/monitor/promtail/rbac.authorization.k8s.io_v1_ClusterRole_promtail.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Source: promtail/templates/clusterrole.yaml -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: promtail - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -rules: - - apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - verbs: - - get - - watch - - list \ No newline at end of file diff --git a/monitor/promtail/ressources_no_ns.tf b/monitor/promtail/ressources_no_ns.tf deleted file mode 100644 index 9fa58b7..0000000 --- a/monitor/promtail/ressources_no_ns.tf +++ /dev/null @@ -1,45 +0,0 @@ - -# first loop through resources in ids_prio[0] -resource "kustomization_resource" "pre_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[0] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) -} - -# then loop through resources in ids_prio[1] -# and set an explicit depends_on on kustomization_resource.pre -# wait 2 minutes for any deployment or daemonset to become ready -resource "kustomization_resource" "main_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[1] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - wait = true - timeouts { - create = "5m" - update = "5m" - } - - depends_on = [kustomization_resource.pre_no_ns] -} - -# finally, loop through resources in ids_prio[2] -# and set an explicit depends_on on kustomization_resource.main -resource "kustomization_resource" "post_no_ns" { - for_each = data.kustomization_overlay.data_no_ns.ids_prio[2] - - manifest = ( - contains(["_/Secret"], regex("(?P.*/.*)/.*/.*", each.value)["group_kind"]) - ? sensitive(data.kustomization_overlay.data_no_ns.manifests[each.value]) - : data.kustomization_overlay.data_no_ns.manifests[each.value] - ) - - depends_on = [kustomization_resource.main_no_ns] -} diff --git a/monitor/promtail/v1_ServiceAccount_promtail.yaml b/monitor/promtail/v1_ServiceAccount_promtail.yaml deleted file mode 100644 index 0adb061..0000000 --- a/monitor/promtail/v1_ServiceAccount_promtail.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -# Source: promtail/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: promtail - namespace: vynil-monitor - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm \ No newline at end of file diff --git a/monitor/promtail/v1_Service_promtail-metrics.yaml b/monitor/promtail/v1_Service_promtail-metrics.yaml deleted file mode 100644 index e591249..0000000 --- a/monitor/promtail/v1_Service_promtail-metrics.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Source: promtail/templates/service-metrics.yaml -apiVersion: v1 -kind: Service -metadata: - name: promtail-metrics - namespace: vynil-monitor - labels: - helm.sh/chart: promtail-6.15.4 - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail - app.kubernetes.io/version: "2.9.3" - app.kubernetes.io/managed-by: Helm -spec: - clusterIP: None - ports: - - name: http-metrics - port: 3101 - targetPort: http-metrics - protocol: TCP - selector: - app.kubernetes.io/name: promtail - app.kubernetes.io/instance: promtail \ No newline at end of file diff --git a/monitor/thanos-ruler/index.yaml b/monitor/thanos-ruler/index.yaml deleted file mode 100644 index 2570fad..0000000 --- a/monitor/thanos-ruler/index.yaml +++ /dev/null @@ -1,82 +0,0 @@ ---- -apiVersion: vinyl.solidite.fr/v1beta1 -kind: Component -category: monitor -metadata: - name: thanos-ruler - description: null -options: - images: - default: - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - examples: - - operator: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - operator: - default: - pullPolicy: IfNotPresent - registry: docker.io - repository: to-be/defined - tag: v1.0.0 - properties: - pullPolicy: - default: IfNotPresent - enum: - - Always - - Never - - IfNotPresent - type: string - registry: - default: docker.io - type: string - repository: - default: to-be/defined - type: string - tag: - default: v1.0.0 - type: string - type: object - type: object - domain_name: - default: your_company.com - examples: - - your_company.com - type: string - ingress_class: - default: traefik - examples: - - traefik - type: string - domain: - default: your-company - examples: - - your-company - type: string - sub_domain: - default: to-be-set - examples: - - to-be-set - type: string - issuer: - default: letsencrypt-prod - examples: - - letsencrypt-prod - type: string -dependencies: [] -providers: - kubernetes: true - authentik: true - kubectl: true - postgresql: null - restapi: null - http: null - gitea: null -tfaddtype: null diff --git a/monitor/thanos-ruler/monitoring.coreos.com_v1_ServiceMonitor_kube-prometheus-stack-thanos-ruler.yaml b/monitor/thanos-ruler/monitoring.coreos.com_v1_ServiceMonitor_kube-prometheus-stack-thanos-ruler.yaml deleted file mode 100644 index cd52cc8..0000000 --- a/monitor/thanos-ruler/monitoring.coreos.com_v1_ServiceMonitor_kube-prometheus-stack-thanos-ruler.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Source: kube-prometheus-stack/templates/thanos-ruler/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-prometheus-stack-thanos-ruler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-thanos-ruler - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - - selector: - matchLabels: - app: kube-prometheus-stack-thanos-ruler - release: "prometheus-community" - self-monitor: "true" - namespaceSelector: - matchNames: - - "vynil-monitor" - endpoints: - - port: web - path: "/metrics" \ No newline at end of file diff --git a/monitor/thanos-ruler/monitoring.coreos.com_v1_ThanosRuler_kube-prometheus-stack-thanos-ruler.yaml b/monitor/thanos-ruler/monitoring.coreos.com_v1_ThanosRuler_kube-prometheus-stack-thanos-ruler.yaml deleted file mode 100644 index 9a722a3..0000000 --- a/monitor/thanos-ruler/monitoring.coreos.com_v1_ThanosRuler_kube-prometheus-stack-thanos-ruler.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Source: kube-prometheus-stack/templates/thanos-ruler/ruler.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ThanosRuler -metadata: - name: kube-prometheus-stack-thanos-ruler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-thanos-ruler - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - image: "quay.io/thanos/thanos:v0.33.0" - replicas: 1 - listenLocal: false - serviceAccountName: kube-prometheus-stack-thanos-ruler - externalPrefix: http://kube-prometheus-stack-thanos-ruler.vynil-monitor:10902 - paused: false - logFormat: "logfmt" - logLevel: "info" - retention: "24h" - ruleNamespaceSelector: {} - ruleSelector: - matchLabels: - release: "prometheus-community" - - routePrefix: "/" - securityContext: - fsGroup: 2000 - runAsGroup: 2000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - portName: web \ No newline at end of file diff --git a/monitor/thanos-ruler/v1_Secret_kube-prometheus-stack-thanos-ruler.yaml b/monitor/thanos-ruler/v1_Secret_kube-prometheus-stack-thanos-ruler.yaml deleted file mode 100644 index 069e039..0000000 --- a/monitor/thanos-ruler/v1_Secret_kube-prometheus-stack-thanos-ruler.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Source: kube-prometheus-stack/templates/thanos-ruler/secret.yaml -apiVersion: v1 -kind: Secret -metadata: - name: kube-prometheus-stack-thanos-ruler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-thanos-ruler - - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -data: \ No newline at end of file diff --git a/monitor/thanos-ruler/v1_ServiceAccount_kube-prometheus-stack-thanos-ruler.yaml b/monitor/thanos-ruler/v1_ServiceAccount_kube-prometheus-stack-thanos-ruler.yaml deleted file mode 100644 index 197a80d..0000000 --- a/monitor/thanos-ruler/v1_ServiceAccount_kube-prometheus-stack-thanos-ruler.yaml +++ /dev/null @@ -1,18 +0,0 @@ ---- -# Source: kube-prometheus-stack/templates/thanos-ruler/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kube-prometheus-stack-thanos-ruler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-thanos-ruler - app.kubernetes.io/name: kube-prometheus-stack-thanos-ruler - app.kubernetes.io/component: thanos-ruler - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" \ No newline at end of file diff --git a/monitor/thanos-ruler/v1_Service_kube-prometheus-stack-thanos-ruler.yaml b/monitor/thanos-ruler/v1_Service_kube-prometheus-stack-thanos-ruler.yaml deleted file mode 100644 index 9e312ac..0000000 --- a/monitor/thanos-ruler/v1_Service_kube-prometheus-stack-thanos-ruler.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Source: kube-prometheus-stack/templates/thanos-ruler/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: kube-prometheus-stack-thanos-ruler - namespace: vynil-monitor - labels: - app: kube-prometheus-stack-thanos-ruler - self-monitor: "true" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: prometheus-community - app.kubernetes.io/version: "56.1.0" - app.kubernetes.io/part-of: kube-prometheus-stack - chart: kube-prometheus-stack-56.1.0 - release: "prometheus-community" - heritage: "Helm" -spec: - ports: - - name: web - port: 10902 - targetPort: 10902 - protocol: TCP - selector: - app.kubernetes.io/name: thanos-ruler - thanos-ruler: kube-prometheus-stack-thanos-ruler - type: "ClusterIP" \ No newline at end of file