create grafana dashboards into separate folders

This commit is contained in:
2025-11-07 22:34:44 +01:00
parent 8aeb823e50
commit bd8065b657
14 changed files with 15300 additions and 22 deletions

View File

@@ -2,6 +2,10 @@ ELASTIC_HOSTS=http://elasticsearch:9200
ELASTIC_PASSWORD=
KIBANA_PASSWORD=
GRAFANA_PASSWORD=
SLACK_WEBHOOK_URL=
DISCORD_WEBHOOK_URL=
DB_USERNAME=
DB_PASSWORD=
LOKI_S3_BUCKET=
LOKI_S3_ENDPOINT=

View File

@@ -0,0 +1,6 @@
---
dashboard_folders:
- database
- container
- cache
- server

View File

@@ -16,6 +16,12 @@
state: directory
mode: "0755"
- name: Create grafana alerting directory on host
file:
path: /etc/grafana/alerting
state: directory
mode: "0755"
- name: Copy prometheus datasource config
template:
src: grafana-datasources.yml.j2
@@ -26,11 +32,42 @@
src: grafana-dashboards.yml.j2
dest: /etc/grafana/dashboards/grafana-dashboards.yml
- name: Copy grafana dashboards
- name: Ensure remote dashboard folders exist
file:
path: "/etc/grafana/dashboards/{{ item }}"
state: directory
mode: "0755"
loop: "{{ dashboard_folders }}"
- name: Copy grafana dashboards - database
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-dashboards/*.json') }}"
dest: /etc/grafana/dashboards/database/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-dashboards/database/*.json') }}"
- name: Copy grafana dashboards - container
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/container/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-dashboards/container/*.json') }}"
- name: Copy grafana dashboards - cache
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/cache/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-dashboards/cache/*.json') }}"
- name: Copy grafana dashboards - server
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/server/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-dashboards/server/*.json') }}"
- name: Copy grafana alerting config
copy:
src: "{{ item }}"
dest: /etc/grafana/alerting/{{ item | basename | regex_replace('\.j2$', '') }}
loop: "{{ query('fileglob', 'templates/grafana-alerting/*.j2') }}"
- name: Pull Grafana Docker image
docker_image:

View File

@@ -0,0 +1,16 @@
apiVersion: 1
contactPoints:
- orgId: 1
name: 'Discord & Slack #alerts'
receivers:
- uid: beeuk6rhszaiof
type: slack
settings:
url: {{ env_vars.SLACK_WEBHOOK_URL }}
disableResolveMessage: false
- uid: eewvrxp9d14w0a
type: webhook
settings:
httpMethod: POST
url: {{ env_vars.DISCORD_WEBHOOK_URL }}
disableResolveMessage: false

View File

@@ -0,0 +1,209 @@
apiVersion: 1
groups:
- orgId: 1
name: 1m eval
folder: alerts
interval: 10s
rules:
- uid: cefvxm65auneod
title: Disk Space Used - Database Storage
condition: D
data:
- refId: B
relativeTimeRange:
from: 86400
to: 0
datasourceUid: PBFA97CFB590B2093
model:
adhocFilters: []
datasource:
type: prometheus
uid: PBFA97CFB590B2093
disableTextWrap: false
editorMode: builder
expr: ( + 100) - (node_filesystem_avail_bytes{job="node_exporter", device!~"rootfs", mountpoint="/mnt/database_storage_volume"} * 100 / node_filesystem_size_bytes{job="node_exporter", device!~"rootfs", mountpoint="/mnt/database_storage_volume"})
format: time_series
fullMetaSearch: false
includeNullMetadata: true
instant: true
interval: ""
intervalFactor: 1
intervalMs: 15000
legendFormat: '{{mountpoint}}'
maxDataPoints: 43200
range: false
refId: B
step: 240
useBackend: false
- refId: D
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 80
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: B
intervalMs: 1000
maxDataPoints: 43200
refId: D
type: threshold
dashboardUid: rYdddlPWk
panelId: 152
noDataState: NoData
execErrState: Error
for: 1m
annotations:
__dashboardUid__: rYdddlPWk
__panelId__: "152"
labels: {}
isPaused: false
notification_settings:
receiver: 'Discord & Slack #alerts'
- uid: ff1ifyc0gczr4e
title: Disk Space Used - Root FS
condition: C
data:
- refId: A
relativeTimeRange:
from: 86400
to: 0
datasourceUid: PBFA97CFB590B2093
model:
adhocFilters: []
datasource:
type: prometheus
uid: PBFA97CFB590B2093
disableTextWrap: false
editorMode: builder
expr: 100 - (node_filesystem_avail_bytes{job="node_exporter", device!~"rootfs", mountpoint="/"} * 100 / node_filesystem_size_bytes{job="node_exporter", device!~"rootfs", mountpoint="/"})
format: time_series
fullMetaSearch: false
includeNullMetadata: true
instant: true
interval: ""
intervalFactor: 1
intervalMs: 15000
legendFormat: '{{mountpoint}}'
maxDataPoints: 43200
range: false
refId: A
step: 240
useBackend: false
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 80
type: gt
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
dashboardUid: rYdddlPWk
panelId: 152
noDataState: NoData
execErrState: Error
for: 1m
annotations:
__dashboardUid__: rYdddlPWk
__panelId__: "152"
isPaused: false
notification_settings:
receiver: 'Discord & Slack #alerts'
- uid: ef1ir88aq1tz4f
title: Web CPU - Idle low threshold
condition: value
data:
- refId: A
relativeTimeRange:
from: 900
to: 0
datasourceUid: PBFA97CFB590B2093
model:
adhocFilters: []
datasource:
type: prometheus
uid: PBFA97CFB590B2093
disableTextWrap: false
editorMode: code
expr: round(sum(irate(node_cpu_seconds_total{instance="loadbalancer:9100", job="node_exporter", mode="idle"}[$__rate_interval])) / scalar(count(count by(cpu) (node_cpu_seconds_total{instance="loadbalancer:9100", job="node_exporter"}))), 0.01)
format: time_series
fullMetaSearch: false
includeNullMetadata: true
instant: true
interval: ""
intervalFactor: 1
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
step: 240
useBackend: false
- refId: value
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 2
type: gt
operator:
type: and
query:
params:
- I
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: value
type: threshold
dashboardUid: rYdddlPWk
panelId: 3
noDataState: NoData
execErrState: Error
annotations:
__dashboardUid__: rYdddlPWk
__panelId__: "3"
description: CPU usage for {{ $labels.instance }} has exceeded 80% ({{ $values.A.Value }}) for the last 5 minutes.
summary: A web server is under high-load!
isPaused: false
notification_settings:
receiver: slack local

View File

@@ -1,24 +1,26 @@
apiVersion: 1
providers:
# <string> an unique provider name. Required
- name: 'ansible managed dashboards'
# <int> Org id. Default to 1
orgId: 1
# <string> name of the dashboard folder.
folder: ''
# <string> folder UID. will be automatically generated if not specified
folderUid: ''
# <string> provider type. Default to 'file'
- name: Database
folder: database
type: file
# <bool> disable dashboard deletion
disableDeletion: false
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 10
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: false
options:
# <string, required> path to dashboard files on disk. Required when using the 'file' type
path: /etc/grafana/provisioning/dashboards
# <bool> use folder names from filesystem to create folders in Grafana
foldersFromFilesStructure: true
path: /etc/grafana/provisioning/dashboards/database
- name: Container
folder: container
type: file
options:
path: /etc/grafana/provisioning/dashboards/container
- name: Cache
folder: cache
type: file
options:
path: /etc/grafana/provisioning/dashboards/cache
- name: Server
folder: server
type: file
options:
path: /etc/grafana/provisioning/dashboards/server

View File

@@ -0,0 +1,780 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Simple exporter for cadvisor only",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 6,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 8,
"panels": [],
"title": "CPU",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 1
},
"id": 15,
"options": {
"alertThreshold": true,
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name) *100",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 11,
"panels": [],
"title": "Memory",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"id": 9,
"options": {
"alertThreshold": true,
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(container_memory_rss{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"id": 14,
"options": {
"alertThreshold": true,
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(container_memory_cache{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Memory Cached",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 2,
"panels": [],
"title": "Network",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 4,
"options": {
"alertThreshold": true,
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Received Network Traffic",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 18
},
"id": 6,
"options": {
"alertThreshold": true,
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "11.5.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Sent Network Traffic",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 19,
"panels": [],
"title": "Misc",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"custom": {
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "id"
},
"properties": [
{
"id": "custom.width",
"value": 260
}
]
},
{
"matcher": {
"id": "byName",
"options": "Running"
},
"properties": [
{
"id": "unit",
"value": "d"
},
{
"id": "decimals",
"value": 1
},
{
"id": "custom.cellOptions",
"value": {
"type": "color-text"
}
},
{
"id": "color",
"value": {
"fixedColor": "dark-green",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 27
},
"id": 17,
"options": {
"showHeader": true,
"sortBy": []
},
"pluginVersion": "7.4.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "(time() - container_start_time_seconds{instance=~\"$host\",name=~\"$container\",name=~\".+\"})/86400",
"format": "table",
"instant": true,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Containers Info",
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"container_label_com_docker_compose_project",
"container_label_com_docker_compose_project_working_dir",
"image",
"instance",
"name",
"Value",
"container_label_com_docker_compose_service"
]
}
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"indexByName": {},
"renameByName": {
"Value": "Running",
"container_label_com_docker_compose_project": "Label",
"container_label_com_docker_compose_project_working_dir": "Working dir",
"container_label_com_docker_compose_service": "Service",
"image": "Registry Image",
"instance": "Instance",
"name": "Name"
}
}
}
],
"type": "table"
}
],
"preload": false,
"refresh": "",
"schemaVersion": 40,
"tags": [
"cadvisor",
"docker"
],
"templating": {
"list": [
{
"allValue": ".*",
"current": {
"text": "All",
"value": "$__all"
},
"datasource": "PBFA97CFB590B2093",
"definition": "label_values({__name__=~\"container.*\"},instance)",
"includeAll": true,
"label": "Host",
"name": "host",
"options": [],
"query": {
"query": "label_values({__name__=~\"container.*\"},instance)",
"refId": "Prometheus-host-Variable-Query"
},
"refresh": 1,
"regex": "",
"sort": 5,
"type": "query"
},
{
"allValue": ".*",
"current": {
"text": "All",
"value": "$__all"
},
"datasource": "PBFA97CFB590B2093",
"definition": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)",
"includeAll": true,
"label": "Container",
"name": "container",
"options": [],
"query": {
"query": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)",
"refId": "Prometheus-container-Variable-Query"
},
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Cadvisor exporter",
"uid": "pMEd7m0Mz",
"version": 2,
"weekStart": ""
}

View File

@@ -48,3 +48,11 @@ scrape_configs:
- job_name: cadvisor
static_configs:
- targets: ['elasticsearch:8080']
- job_name: mysql
metrics_path: /probe
static_configs:
- targets: ['database:9104']
relabel_configs:
- target_label: __param_target
replacement: localhost:3306