diff --git a/docker/compose/validator-prod/alertmanager/alertmanager.yml b/docker/compose/validator-prod/alertmanager/alertmanager.yml new file mode 100644 index 0000000000000000000000000000000000000000..f72951393486b0e986ac0b0032b6c669ee499745 --- /dev/null +++ b/docker/compose/validator-prod/alertmanager/alertmanager.yml @@ -0,0 +1,16 @@ +global: + resolve_timeout: 1m + +route: + receiver: 'mail-notifications' + +receivers: +- name: 'mail-notifications' + email_configs: + - to: 'mydedicatednodealertaddress@domain.tld' + from: 'sender@domain.tld' + smarthost: 'smtp.server.tld:587' + auth_username: 'sender@domain.tld' + auth_identity: 'sender@domain.tld' + auth_password: 'yrymyemufalyjing' + send_resolved: true diff --git a/docker/compose/validator-prod/docker-compose.yml b/docker/compose/validator-prod/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..a09c4fb363ae66e32ceeefde1eb020566f779c10 --- /dev/null +++ b/docker/compose/validator-prod/docker-compose.yml @@ -0,0 +1,93 @@ +version: "3.4" + +services: + duniter-validator: + image: duniter/duniter-v2s:sha-142d4763 + restart: unless-stopped + ports: + - 127.0.0.1:9615:9615 + - 127.0.0.1:9933:9933 + - 127.0.0.1:9944:9944 + - 30333:30333 + volumes: + - ./node.key:/etc/duniter/node.key + - duniter-validator-data:/var/lib/duniter/ + environment: + - DUNITER_CHAIN_NAME=gdev + command: + - "--execution" + - "Wasm" + - "--node-key-file" + - "/etc/duniter/node-key" + - "--public-addr" + # SERVER_DOMAIN should be replaced by a domain name that point on your server + # + # The PEER_ID should be replaced by the output of this command: + # docker run --rm -it --entrypoint duniter -v $PWD:/var/lib/duniter/ duniter/duniter-v2s:sha-142d4763 key generate-node-key --file /var/lib/duniter/node.key + - "/dns/SERVER_DOMAIN/tcp/30333/p2p/PEER_ID" + - "--rpc-methods=Unsafe" + - "--validator" + - "--rpc-cors" + - "all" + - "--prometheus-external" + + prometheus: + image: prom/prometheus:v2.30.3 + restart: unless-stopped + ports: + - 9000:9090 + volumes: + - ./prometheus:/etc/prometheus + - prometheus-data:/prometheus + command: --web.enable-lifecycle --config.file=/etc/prometheus/prometheus.yml + extra_hosts: + - "host.docker.internal:host-gateway" + + alertmanager: + image: prom/alertmanager:v0.24.0 + restart: unless-stopped + ports: + - 127.0.0.1:9093:9093 + volumes: + - ./alertmanager:/etc/alertmanager + - alertmanager-data:/alertmanager + command: --config.file=/etc/alertmanager/alertmanager.yml + + process-exporter: + image: ncabatoff/process-exporter + restart: unless-stopped + privileged: True + ports: + - 127.0.0.1:9256:9256 + volumes: + - /proc:/host/proc + - ./process-exporter:/config + command: --procfs /host/proc -config.path /config/config.yml + + node_exporter: + image: quay.io/prometheus/node-exporter:latest + container_name: node_exporter + command: + - '--path.rootfs=/host' + network_mode: host + pid: host + restart: unless-stopped + volumes: + - '/:/host:ro,rslave' + + grafana: + image: grafana/grafana:8.5.4 + restart: unless-stopped + ports: + - 127.0.0.1:3000:3000 + volumes: + - ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources + - grafana-data:/var/lib/grafana + environment: + - GF_INSTALL_PLUGINS=camptocamp-prometheus-alertmanager-datasource + +volumes: + alertmanager-data: + duniter-validator-data: + grafana-data: + prometheus-data: diff --git a/docker/compose/validator-prod/grafana/provisioning/datasources/prometheus_ds.yml b/docker/compose/validator-prod/grafana/provisioning/datasources/prometheus_ds.yml new file mode 100644 index 0000000000000000000000000000000000000000..2842d119c8e21df4998295e12eedf373d4c64fb9 --- /dev/null +++ b/docker/compose/validator-prod/grafana/provisioning/datasources/prometheus_ds.yml @@ -0,0 +1,10 @@ +datasources: + - name: Prometheus + access: proxy + type: prometheus + url: http://prometheus:9090 + isDefault: true + - name: Prometheus AlertManager + access: proxy + type: camptocamp-prometheus-alertmanager-datasource + url: http://alertmanager:9093 diff --git a/docker/compose/validator-prod/grafana/templates/polkadot-essentials-2021.json b/docker/compose/validator-prod/grafana/templates/polkadot-essentials-2021.json new file mode 100644 index 0000000000000000000000000000000000000000..921f468eb7df9f26bad64b99e96c4547c3b0c8a0 --- /dev/null +++ b/docker/compose/validator-prod/grafana/templates/polkadot-essentials-2021.json @@ -0,0 +1,3671 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:15", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "description": "Server and Substrate chain main metrics, suitable for a node monitoring in 2021 - \r\nTutorial: https://medium.com/bld-nodes/monitoring-substrate-node-polkadot-kusama-parachains-validator-guide-922734ea4cdb", + "editable": true, + "gnetId": 13840, + "graphTooltip": 0, + "id": 1, + "iteration": 1654016981360, + "links": [ + { + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "Polkastats.io", + "tooltip": "", + "type": "link", + "url": "https://polkastats.io" + }, + { + "icon": "external link", + "tags": [], + "title": "Telemetry", + "type": "link", + "url": "https://telemetry.polkadot.io" + }, + { + "icon": "external link", + "tags": [], + "title": "Polkadot JS app", + "type": "link", + "url": "https://polkadot.js.org/apps/#/explorer" + } + ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus AlertManager", + "decimals": null, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 84, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "annotations": false, + "expr": "", + "labelSelector": "*", + "legendFormat": "", + "refId": "A", + "target": "Query", + "type": "single" + } + ], + "thresholds": "1,5", + "title": "All alerts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": "Prometheus AlertManager", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 3, + "w": 21, + "x": 3, + "y": 0 + }, + "id": 86, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "severity", + "thresholds": ["1", "2"], + "type": "string", + "unit": "short", + "valueMaps": [ + { + "text": "Critical", + "value": "3" + }, + { + "text": "Warning", + "value": "2" + }, + { + "text": "Info", + "value": "1" + } + ] + }, + { + "alias": "", + "align": "auto", + "colorMode": "row", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": ["0"], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "annotations": true, + "expr": "", + "hide": false, + "labelSelector": "*", + "legendFormat": "{{ summary }}", + "refId": "A", + "target": "Query", + "type": "table" + }, + { + "annotations": false, + "expr": "", + "labelSelector": "*", + "legendFormat": "{{ instance }}", + "refId": "B", + "target": "Query", + "type": "table" + } + ], + "title": "All Alerts Info", + "transform": "table", + "type": "table-old" + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 50, + "panels": [], + "title": "Server Health", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 4 + }, + "id": 54, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 900 + } + ], + "title": "CPU Busy", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together (5 min average)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 4 + }, + "id": 56, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Sys Load (5m avg)", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together (15 min average)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 4 + }, + "id": 58, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Sys Load (15m avg)", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Non available RAM memory", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 4 + }, + "hideTimeOverride": false, + "id": 60, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "A", + "step": 900 + }, + { + "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "B", + "step": 900 + } + ], + "title": "RAM Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Used Swap", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 25 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 4 + }, + "id": 62, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "SWAP Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Used Root FS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 4 + }, + "id": 64, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Root FS Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "description": "Total number of CPU cores", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 4 + }, + "id": 66, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "CPU Cores", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 1, + "description": "System uptime", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 4 + }, + "hideTimeOverride": true, + "id": 70, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "$$hashKey": "object:1094", + "name": "value to text", + "value": 1 + }, + { + "$$hashKey": "object:1095", + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "null", + "nullText": null, + "postfix": "s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "$$hashKey": "object:1097", + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total RootFS", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 6 + }, + "id": 68, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "70,90", + "title": "RootFS Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total RAM", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 20, + "y": 6 + }, + "id": 82, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "70%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "RAM Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total SWAP", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 22, + "y": 6 + }, + "id": 74, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "70%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "SWAP Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "Busy": "#EAB839", + "Busy Iowait": "#890F02", + "Busy other": "#1F78C1", + "Idle": "#052B51", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Basic CPU info", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 76, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 250, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:3565", + "alias": "Busy Iowait", + "color": "#890F02" + }, + { + "$$hashKey": "object:3566", + "alias": "Idle", + "color": "#7EB26D" + }, + { + "$$hashKey": "object:3567", + "alias": "Busy System", + "color": "#EAB839" + }, + { + "$$hashKey": "object:3568", + "alias": "Busy User", + "color": "#0A437C" + }, + { + "$$hashKey": "object:3569", + "alias": "Busy Other", + "color": "#B877D9" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Busy System", + "refId": "A", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Busy User", + "refId": "B", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy Iowait", + "refId": "C", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy IRQs", + "refId": "D", + "step": 240 + }, + { + "expr": "sum (irate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy Other", + "refId": "E", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Idle", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3600", + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3601", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "SWAP Used": "#BF1B00", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap Used": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Basic memory usage", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 8 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:4023", + "alias": "RAM Total", + "color": "#E0F9D7", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:4024", + "alias": "RAM Cache + Buffer", + "color": "#052B51" + }, + { + "$$hashKey": "object:4025", + "alias": "RAM Free", + "color": "#7EB26D" + }, + { + "$$hashKey": "object:4026", + "alias": "Avaliable", + "color": "#DEDAF7", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:4083", + "alias": "Polkadot", + "color": "rgb(247, 0, 222)", + "stack": false + }, + { + "$$hashKey": "object:5303", + "alias": "RAM Used", + "color": "#FADE2A" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "namedprocess_namegroup_memory_bytes{groupname=\"[[metric_namespace]]\",memtype=\"resident\"}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Polkadot", + "refId": "F" + }, + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "RAM Total", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "RAM Used", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RAM Cache + Buffer", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RAM Free", + "refId": "D", + "step": 240 + }, + { + "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "SWAP Used", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4059", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:4060", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Recv_bytes_eth2": "#7EB26D", + "Recv_bytes_lo": "#0A50A1", + "Recv_drop_eth2": "#6ED0E0", + "Recv_drop_lo": "#E0F9D7", + "Recv_errs_eth2": "#BF1B00", + "Recv_errs_lo": "#CCA300", + "Trans_bytes_eth2": "#7EB26D", + "Trans_bytes_lo": "#0A50A1", + "Trans_drop_eth2": "#6ED0E0", + "Trans_drop_lo": "#E0F9D7", + "Trans_errs_eth2": "#BF1B00", + "Trans_errs_lo": "#CCA300", + "recv_bytes_lo": "#0A50A1", + "recv_drop_eth0": "#99440A", + "recv_drop_lo": "#967302", + "recv_errs_eth0": "#BF1B00", + "recv_errs_lo": "#890F02", + "trans_bytes_eth0": "#7EB26D", + "trans_bytes_lo": "#0A50A1", + "trans_drop_eth0": "#99440A", + "trans_drop_lo": "#967302", + "trans_errs_eth0": "#BF1B00", + "trans_errs_lo": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Basic network info per interface", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 80, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1019", + "alias": "/.*trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "recv {{device}}", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "trans {{device}} ", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1065", + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1066", + "format": "pps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "io time": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 8 + }, + "hiddenSeries": false, + "id": 88, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}} - Time spent doing I/Os", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Usage Times", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3764", + "format": "s", + "label": "time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3765", + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 52, + "panels": [], + "title": "Chain", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [ + { + "title": "", + "url": "" + } + ], + "mappings": [ + { + "id": 0, + "op": "=", + "text": "Stuck", + "type": 1, + "value": "1492283" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(222, 101, 171)", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 11, + "x": 0, + "y": 16 + }, + "id": 26, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": ["last"], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "[[metric_namespace]]_block_height{instance=\"[[instance]]\",network=\"[[network]]\"}", + "interval": "", + "legendFormat": "{{status}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Height of the chain", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 6, + "x": 11, + "y": 16 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_block_height{instance=\"[[instance]]\",network=\"[[network]]\",status=~\"finalized|sync_target\"}", + "legendFormat": "{{instance}} {{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Finalized", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1885", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1886", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-blue", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 7, + "x": 17, + "y": 16 + }, + "id": 28, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "expr": "rate([[metric_namespace]]_sub_libp2p_network_bytes_total{instance=\"[[instance]]\",network=\"[[network]]\"}[5m])", + "instant": false, + "interval": "", + "legendFormat": "{{direction}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Networking bytes per second", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate([[metric_namespace]]_block_height{status=\"finalized\",instance=\"[[instance]]\",network=\"[[network]]\"}[10m])*60", + "interval": "", + "intervalFactor": 10, + "legendFormat": "{{instance}} Blocks / minute", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1800", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1801", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 20 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "data": "", + "expr": "[[metric_namespace]]_ready_transactions_number{instance=\"[[instance]]\",network=\"[[network]]\"}", + "hide": false, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "txcount", + "type": "timeseries" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TXs Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2138", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2139", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_sub_libp2p_peers_count{instance=\"[[instance]]\",network=\"[[network]]\"}", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Peers count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2223", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2224", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 20 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_state_cache_bytes{instance=\"[[instance]]\",network=\"[[network]]\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "State Cache", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1295", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1296", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_sync_queued_blocks{instance=\"[[instance]]\",network=\"[[network]]\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queued Blocks (sync)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1970", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1971", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 26 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate([[metric_namespace]]_block_height{status=\"finalized\",instance=\"[[instance]]\",network=\"[[network]]\"}[10m])/rate([[metric_namespace]]_block_height{status=\"finalized\",instance=\"[[instance]]\",network=\"[[network]]\"}[1m])", + "intervalFactor": 1, + "legendFormat": "rate[10m] / rate[1m]", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Relative Block Production Speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:410", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:411", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 10, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase([[metric_namespace]]_block_height{instance=\"[[instance]]\",network=\"[[network]]\",status=~\"finalized|sync_target\"}[1m])", + "intervalFactor": 5, + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Blocks Av per min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2972", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2973", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 26 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "scalar([[metric_namespace]]_block_height{status=\"best\",instance=\"[[instance]]\",network=\"[[network]]\"})-scalar([[metric_namespace]]_block_height{status=\"finalized\",instance=\"[[instance]]\",network=\"[[network]]\"})", + "intervalFactor": 2, + "legendFormat": "[[hostname]]", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Diff -> ( Best Block - Finalized )", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3057", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:3058", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 90, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate([[metric_namespace]]_tasks_polling_started_total{instance=\"[[instance]]\",network=\"[[network]]\"}[1m]) - rate([[metric_namespace]]_tasks_polling_duration_count{instance=\"[[instance]]\",network=\"[[network]]\"}[1m])", + "interval": "", + "legendFormat": "{{task_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Running Tasks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:956", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:957", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 32 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_sub_libp2p_connections{instance=\"[[instance]]\",network=\"[[network]]\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}} connections", + "refId": "A" + }, + { + "expr": "[[metric_namespace]]_sub_libp2p_is_major_syncing{instance=\"[[instance]]\",network=\"[[network]]\"}", + "hide": false, + "legendFormat": "{{instance}} syncing", + "refId": "B" + }, + { + "expr": "[[metric_namespace]]_sub_libp2p_kbuckets_num_nodes{instance=\"[[instance]]\",network=\"[[network]]\"}", + "hide": false, + "legendFormat": "{{instance}} num_nodes", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "sub_libp2p", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3225", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:3226", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[metric_namespace]]_sync_extra_justifications{instance=\"[[instance]]\",network=\"[[network]]\"}", + "interval": "", + "legendFormat": "{{instance}} active", + "refId": "A" + }, + { + "expr": "[[metric_namespace]]_sync_extra_justifications_failed{instance=\"[[instance]]\",network=\"[[network]]\"}", + "legendFormat": "{{instance}} failed", + "refId": "B" + }, + { + "expr": "[[metric_namespace]]_sync_extra_justifications_importing{instance=\"[[instance]]\",network=\"[[network]]\"}", + "legendFormat": "{{instance}} importing", + "refId": "C" + }, + { + "expr": "[[metric_namespace]]_sync_extra_justifications_pending{instance=\"[[instance]]\",network=\"[[network]]\"}", + "legendFormat": "{{instance}} pending", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sync justifications", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2339", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2340", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 32 + }, + "hiddenSeries": false, + "id": 92, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate([[metric_namespace]]_finality_grandpa_communication_gossip_validator_messages{instance=\"[[instance]]\",network=\"[[network]]\"}[5m])", + "interval": "", + "legendFormat": "{{message}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GRANDPA Validator Messages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:312", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:313", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 38 + }, + "id": 94, + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.7", + "targets": [ + { + "exemplar": true, + "expr": "namedprocess_namegroup_thread_count{groupname=\"[[chain_process]]\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{threadname}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Thread count", + "type": "bargauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 20, + "x": 4, + "y": 38 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "irate(namedprocess_namegroup_thread_cpu_seconds_total{groupname=\"[[chain_process]]\"} [5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{threadname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread CPU time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3476", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:3477", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "substrate", + "value": "substrate" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Chain Metrics", + "multi": false, + "name": "metric_namespace", + "options": [ + { + "selected": true, + "text": "substrate", + "value": "substrate" + }, + { + "selected": false, + "text": "polkadot", + "value": "polkadot" + } + ], + "query": "substrate, polkadot", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "None", + "value": "" + }, + "datasource": "Prometheus", + "definition": "label_values(network)", + "description": "Default value: None", + "error": null, + "hide": 2, + "includeAll": false, + "label": "Chain Network", + "multi": false, + "name": "network", + "options": [], + "query": { + "query": "label_values(network)", + "refId": "Prometheus-network-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "duniter-rpc:9615", + "value": "duniter-rpc:9615" + }, + "datasource": "Prometheus", + "definition": "label_values(instance)", + "description": "Chain instance server host - default value: localhost:9615", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Chain Instance Host", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(instance)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "None", + "value": "" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info, job)", + "description": "Node exporter server job - default value: node_exporter", + "error": null, + "hide": 2, + "includeAll": false, + "label": "Server Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(node_uname_info, job)", + "refId": "Prometheus-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "None", + "value": "" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info{job=\"$job\"}, instance)", + "description": "Node exporter server host - default value: localhost:9100", + "error": null, + "hide": 2, + "includeAll": false, + "label": "Server Host", + "multi": false, + "name": "node", + "options": [], + "query": { + "query": "label_values(node_uname_info{job=\"$job\"}, instance)", + "refId": "Prometheus-node-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "[a-z]+|nvme[0-9]+n[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+" + }, + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "diskdevices", + "options": [ + { + "selected": true, + "text": "[a-z]+|nvme[0-9]+n[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+" + } + ], + "query": "[a-z]+|nvme[0-9]+n[0-9]+", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "duniter", + "value": "duniter" + }, + "description": "Name of the process of your node", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Chain Process Name", + "multi": false, + "name": "chain_process", + "options": [ + { + "selected": false, + "text": "duniter", + "value": "duniter" + } + ], + "query": "duniter", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Polkadot Essentials 2021", + "uid": "bLdSubstrates", + "version": 1 +} diff --git a/docker/compose/validator-prod/process-exporter/config.yml b/docker/compose/validator-prod/process-exporter/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..b68afb5462020cbdaacdb7eb993267218b2c3b88 --- /dev/null +++ b/docker/compose/validator-prod/process-exporter/config.yml @@ -0,0 +1,4 @@ +process_names: + - name: "{{.Comm}}" + cmdline: + - '.+' diff --git a/docker/compose/validator-prod/prometheus/alert.yml b/docker/compose/validator-prod/prometheus/alert.yml new file mode 100644 index 0000000000000000000000000000000000000000..851e696eaf051966d93d2daae9d18318b1989bdf --- /dev/null +++ b/docker/compose/validator-prod/prometheus/alert.yml @@ -0,0 +1,20 @@ +groups: + - name: alert_rules + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Instance $labels.instance down" + description: "[{{ $labels.instance }}] of job [{{ $labels.job }}] has been down for more than 1 minute." + + - alert: HostHighCpuLoad + expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80 + for: 0m + labels: + severity: warning + annotations: + summary: Host high CPU load (instance bLd Kusama) + description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" diff --git a/docker/compose/validator-prod/prometheus/prometheus.yml b/docker/compose/validator-prod/prometheus/prometheus.yml new file mode 100644 index 0000000000000000000000000000000000000000..dcd40f77bf6249d0c279cfb48690673e0b776e3c --- /dev/null +++ b/docker/compose/validator-prod/prometheus/prometheus.yml @@ -0,0 +1,26 @@ +rule_files: + - alert.yml + +alerting: + alertmanagers: + - static_configs: + - targets: + - localhost:9093 + +scrape_configs: + - job_name: "prometheus" + scrape_interval: 5s + static_configs: + - targets: ["prometheus:9090"] + - job_name: "duniter-validator" + scrape_interval: 5s + static_configs: + - targets: ["duniter-validator:9615"] + - job_name: "node_exporter" + scrape_interval: 5s + static_configs: + - targets: ["host.docker.internal:9100"] + - job_name: "process-exporter" + scrape_interval: 5s + static_configs: + - targets: ["process-exporter:9256"]