可直接拿来用的kafka+prometheus+grafana监控告警配置

Posted NetWhite

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了可直接拿来用的kafka+prometheus+grafana监控告警配置相关的知识,希望对你有一定的参考价值。

kafka配置jmx_exporter

点击:https://github.com/prometheus/jmx_exporter,选择下面的jar包下载:

将下载好的这个agent jar包上传到kafka的broker节点所在服务器上,每个broker都需要,比如上传到如下路径:

/opt/agent/jmx_prometheus_javaagent-0.16.1.jar

修改kafka启动脚本: bin/kafka-run-class.sh,增加java agent配置如下:

JMX_EXPORTER_OPTS="-javaagent:/opt/agent/jmx_prometheus_javaagent-0.16.1.jar=9095:/opt/agent/kafka_broker.yml"
KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS $JMX_EXPORTER_OPTS"

这两行代码可以放在这个位置,脚本的最后几行:

这里指定了9095作为端口,jmx_exporter用到的kafka_broker.yml 配置如下:

---
startDelaySeconds: 20
lowercaseOutputName: true
lowercaseOutputLabelNames: true
blacklistObjectNames:
  - "kafka.consumer:type=*,id=*"
  - "kafka.consumer:type=*,client-id=*"
  - "kafka.consumer:type=*,client-id=*,node-id=*"
  - "kafka.producer:type=*,id=*"
  - "kafka.producer:type=*,client-id=*"
  - "kafka.producer:type=*,client-id=*,node-id=*"
  - "kafka.*:type=kafka-metrics-count,*"
  # This will ignore the admin client metrics from Kafka Brokers and will blacklist certain metrics
  # that do not make sense for ingestion.
  # "kafka.admin.client:type=*, node-id=*, client-id=*"
  # "kafka.admin.client:type=*, client-id=*"
  # "kafka.admin.client:type=*, id=*"
  - "kafka.admin.client:*"
  - "kafka.server:type=*,cipher=*,protocol=*,listener=*,networkProcessor=*"
  - "kafka.server:type=*"
rules:
  # This is by far the biggest contributor to the number of sheer metrics being produced.
  # Always keep it on the top for the case of probability when so many metrics will hit the first condition and exit.
  # "kafka.cluster:type=*, name=*, topic=*, partition=*"
  # "kafka.log:type=*,name=*, topic=*, partition=*"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+), topic=(.+), partition=(.+)><>Value
    name: kafka_$1_$2_$3
    type: GAUGE
    labels:
      topic: "$4"
      partition: "$5"
  # "kafka.server:type=*,name=*, client-id=*, topic=*, partition=*"
  - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value
    name: kafka_server_$1_$2
    type: GAUGE
    labels:
      clientId: "$3"
      topic: "$4"
      partition: "$5"
  - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value
    name: kafka_server_$1_$2
    type: GAUGE
    labels:
      clientId: "$3"
      broker: "$4:$5"
  # "kafka.network:type=*, name=*, request=*, error=*"
  # "kafka.network:type=*, name=*, request=*, version=*"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>(Count|Value)
    name: kafka_$1_$2_$3
    labels:
      "$4": "$5"
      "$6": "$7"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\\d+)thPercentile
    name: kafka_$1_$2_$3
    type: GAUGE
    labels:
      "$4": "$5"
      "$6": "$7"
      quantile: "0.$8"
  # "kafka.rest:type=*, topic=*, partition=*, client-id=*"
  # "kafka.rest:type=*, cipher=*, protocol=*, client-id=*"
  - pattern: kafka.(\\w+)<type=(.+), (.+)=(.+), (.+)=(.+), (.+)=(.+)><>Value
    name: kafka_$1_$2
    labels:
      "$3": "$4"
      "$5": "$6"
      "$7": "$8"
  # Count and Value
  # "kafka.server:type=*, name=*, topic=*"
  # "kafka.server:type=*, name=*, clientId=*"
  # "kafka.server:type=*, name=*, delayedOperation=*"
  # "kafka.server:type=*, name=*, fetcherType=*"
  # "kafka.network:type=*, name=*, networkProcessor=*"
  # "kafka.network:type=*, name=*, processor=*"
  # "kafka.network:type=*, name=*, request=*"
  # "kafka.network:type=*, name=*, listener=*"
  # "kafka.log:type=*, name=*, logDirectory=*"
  # "kafka.log:type=*, name=*, op=*"
  # "kafka.rest:type=*, node-id=*, client-id=*"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+)><>(Count|Value)
    name: kafka_$1_$2_$3
    labels:
      "$4": "$5"
  # "kafka.consumer:type=*, topic=*, client-id=*"
  # "kafka.producer:type=*, topic=*, client-id=*"
  # "kafka.rest:type=*, topic=*, client-id=*"
  # "kafka.server:type=*, broker-id=*, fetcher-id=*"
  # "kafka.server:type=*, listener=*, networkProcessor=*"
  - pattern: kafka.(\\w+)<type=(.+), (.+)=(.+), (.+)=(.+)><>(Count|Value)
    name: kafka_$1_$2
    labels:
      "$3": "$4"
      "$5": "$6"
  # "kafka.network:type=*, name=*"
  # "kafka.server:type=*, name=*"
  # "kafka.controller:type=*, name=*"
  # "kafka.databalancer:type=*, name=*"
  # "kafka.log:type=*, name=*"
  # "kafka.utils:type=*, name=*"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+)><>(Count|Value)
    name: kafka_$1_$2_$3
  # "kafka.producer:type=*, client-id=*"
  # "kafka.producer:type=*, id=*"
  # "kafka.rest:type=*, client-id=*"
  # "kafka.rest:type=*, http-status-code=*"
  # "kafka.server:type=*, BrokerId=*"
  # "kafka.server:type=*, listener=*"
  # "kafka.server:type=*, id=*"
  - pattern: kafka.(\\w+)<type=(.+), (.+)=(.+)><>Value
    name: kafka_$1_$2
    labels:
      "$3": "$4"

  - pattern: kafka.server<type=KafkaRequestHandlerPool, name=RequestHandlerAvgIdlePercent><>OneMinuteRate
    name: kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total
    type: GAUGE
  # "kafka.server:type=*, listener=*, networkProcessor=*, clientSoftwareName=*, clientSoftwareVersion=*"
  - pattern: kafka.server<type=socket-server-metrics, clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections
    name: kafka_server_socketservermetrics_connections
    type: GAUGE
    labels:
      client_software_name: "$1"
      client_software_version: "$2"
      listener: "$3"
      network_processor: "$4"
  - pattern: "kafka.server<type=socket-server-metrics, listener=(.+), networkProcessor=(.+)><>(.+):"
    name: kafka_server_socketservermetrics_$3
    type: GAUGE
    labels:
      listener: "$1"
      network_processor: "$2"
  # "kafka.coordinator.group:type=*, name=*"
  # "kafka.coordinator.transaction:type=*, name=*"
  - pattern: kafka.coordinator.(\\w+)<type=(.+), name=(.+)><>(Count|Value)
    name: kafka_coordinator_$1_$2_$3
  # Percentile
  - pattern: kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\\d+)thPercentile
    name: kafka_$1_$2_$3
    type: GAUGE
    labels:
      "$4": "$5"
      quantile: "0.$6"
  - pattern: kafka.(\\w+)<type=(.+), name=(.+)><>(\\d+)thPercentile
    name: kafka_$1_$2_$3
    type: GAUGE
    labels:
      quantile: "0.$4"

将kafka每个broker都这样配置,重启kafka。

Prometheus配置

修改prometheus的配置prometheus.yml,增加如下配置:

  - job_name: 'kafka'
    metrics_path: /metrics
    static_configs:
    - targets: ['kafka1:9095', 'kafka2:9095', 'kafka3:9095']
      labels:
         env: "test"

p.s. 注意job_name不要修改,值就是"kafka",要不我下面的grafana不能直接用,还需要每个面板依次修改。

Grafana配置

下面的Grafana面板我已经配置好,可以直接拿来用,之后可以根据需要增加或删除相关面板:

{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "description": "Kafka resource usage and throughput",
  "editable": true,
  "gnetId": 721,
  "graphTooltip": 0,
  "id": 4,
  "iteration": 1628943241052,
  "links": [],
  "panels": [
    {
      "collapsed": false,
      "datasource": null,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 42,
      "panels": [],
      "title": "集群健康检查",
      "type": "row"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Number of active controllers in the cluster.",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#299c46",
                "value": null
              },
              {
                "color": "#e5ac0e",
                "value": 2
              },
              {
                "color": "#bf1b00"
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 0,
        "y": 1
      },
      "id": 12,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_controller_kafkacontroller_activecontrollercount{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "intervalFactor": 1,
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "激活状态控制器数量",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Number of Brokers Online",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#d44a3a",
                "value": null
              },
              {
                "color": "rgba(237, 129, 40, 0.89)",
                "value": 0
              },
              {
                "color": "semi-dark-green",
                "value": 2
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 4,
        "y": 1
      },
      "id": 14,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "repeat": null,
      "repeatDirection": "h",
      "targets": [
        {
          "expr": "count(kafka_server_replicamanager_leadercount{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "title": "在线broker数量",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Unclean leader election rate",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#299c46",
                "value": null
              },
              {
                "color": "rgba(237, 129, 40, 0.89)",
                "value": 1
              },
              {
                "color": "#d44a3a"
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 8,
        "y": 1
      },
      "id": 16,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "intervalFactor": 1,
          "refId": "A"
        }
      ],
      "title": "Unclean Leader选举比率",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#299c46",
                "value": null
              },
              {
                "color": "rgba(237, 129, 40, 0.89)",
                "value": 2
              },
              {
                "color": "#d44a3a"
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 12,
        "y": 1
      },
      "id": 33,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "intervalFactor": 1,
          "refId": "A"
        }
      ],
      "title": "未平衡到首选副本的数量",
      "type": "stat"
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 8,
        "w": 8,
        "x": 16,
        "y": 1
      },
      "hiddenSeries": false,
      "id": 84,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic!=\\"\\"}[5m]))",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "Bytes in",
          "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
          "refId": "A",
          "step": 4
        },
        {
          "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic!=\\"\\"}[5m]))",
          "format": "time_series",
          "hide": false,
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "Bytes out",
          "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
          "refId": "B",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Broker网络吞吐量",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "Bps",
          "label": "Bytes/s",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Partitions that are online",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "#d44a3a",
                "value": null
              },
              {
                "color": "rgba(237, 129, 40, 0.89)",
                "value": 0
              },
              {
                "color": "#299c46",
                "value": 0
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 0,
        "y": 5
      },
      "id": 18,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_server_replicamanager_partitioncount{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "title": "在线分区数",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Number of partitions that dont have an active leader and are hence not writable or readable.",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "semi-dark-green",
                "value": null
              },
              {
                "color": "#bf1b00",
                "value": 1
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 4,
        "y": 5
      },
      "id": 22,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "离线分区数量",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Number of under-replicated partitions (| ISR | < | all replicas |).",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "semi-dark-green",
                "value": null
              },
              {
                "color": "rgba(237, 129, 40, 0.89)",
                "value": 1
              },
              {
                "color": "#bf1b00",
                "value": 5
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 8,
        "y": 5
      },
      "id": 20,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "hide": false,
          "intervalFactor": 2,
          "refId": "A"
        }
      ],
      "title": "没有保持同步的分区数",
      "type": "stat"
    },
    {
      "cacheTimeout": null,
      "datasource": "Prometheus",
      "description": "Number of partitions under min insync replicas.",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "mappings": [
            {
              "id": 0,
              "op": "=",
              "text": "N/A",
              "type": 1,
              "value": "null"
            }
          ],
          "nullValueMode": "connected",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "semi-dark-green",
                "value": null
              },
              {
                "color": "#bf1b00",
                "value": 1
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 4,
        "w": 4,
        "x": 12,
        "y": 5
      },
      "id": 32,
      "interval": null,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "fieldOptions": {
          "calcs": [
            "lastNotNull"
          ]
        },
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "7.3.1",
      "targets": [
        {
          "expr": "sum(kafka_cluster_partition_underminisr{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "format": "time_series",
          "hide": false,
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "title": "isr小于最少同步副本的分区数",
      "type": "stat"
    },
    {
      "collapsed": false,
      "datasource": null,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 9
      },
      "id": 40,
      "panels": [],
      "title": "系统状态",
      "type": "row"
    },
    {
      "aliasColors": {
        "localhost:7071": "#629E51"
      },
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 0,
        "y": 10
      },
      "hiddenSeries": false,
      "id": 27,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "irate(process_cpu_seconds_total{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"}[5m])*100",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "process_cpu_secondspersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "CPU 使用率",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "percent",
          "label": "Cores",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {
        "localhost:7071": "#BA43A9"
      },
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 8,
        "y": 10
      },
      "hiddenSeries": false,
      "id": 2,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(area)(jvm_memory_bytes_used{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"})",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "jvm_memory_bytes_used",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "JVM 内存使用情况",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bytes",
          "label": "Memory",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {
        "localhost:7071": "#890F02"
      },
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 16,
        "y": 10
      },
      "hiddenSeries": false,
      "id": 3,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\"}[5m]))",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "jvm_gc_collection_seconds_sum",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "GC耗时",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "percentunit",
          "label": "% time in GC",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {
        "localhost:7071": "#629E51"
      },
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 17
      },
      "hiddenSeries": false,
      "id": 128,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "jvm_gc_collection_seconds_count{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\", gc=~'.*Young.*'} - jvm_gc_collection_seconds_count{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\", gc=~'.*Young.*'} offset 1m",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "process_cpu_secondspersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "最近1分钟YGC次数",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "none",
          "label": "Cores",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {
        "localhost:7071": "#629E51"
      },
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 17
      },
      "hiddenSeries": false,
      "id": 129,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "jvm_gc_collection_seconds_count{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\", gc=~'.*Old.*'} - jvm_gc_collection_seconds_count{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\", gc=~'.*Old.*'} offset 1m",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "process_cpu_secondspersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "最近1分钟FGC次数",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "none",
          "label": "Cores",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "collapsed": false,
      "datasource": null,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 24
      },
      "id": 29,
      "panels": [],
      "title": "消息的吞吐量",
      "type": "row"
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 0,
        "y": 25
      },
      "hiddenSeries": false,
      "id": 10,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic=~\\"$topic\\"}[5m]))",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "kafka_server_brokertopicmetrics_messagesinpersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "每个Broker的消息速率",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "iops",
          "label": "Messages/s",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 8,
        "y": 25
      },
      "hiddenSeries": false,
      "id": 7,
      "isNew": true,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic=~\\"$topic\\"}[5m]))",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{instance}}",
          "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "每个Broker每秒入站字节数",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": "cumulative"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "Bps",
          "label": "Bytes/s",
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 16,
        "y": 25
      },
      "hiddenSeries": false,
      "id": 9,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic=~\\"$topic\\"}[5m]))",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "每个Broker上每秒出站字节数",
      "tooltip": {
        "shared": true,
        "sort": 2,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": null,
      "editable": true,
      "error": false,
      "fieldConfig": {
        "defaults": {
          "custom": {},
          "links": []
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "grid": {},
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 32
      },
      "hiddenSeries": false,
      "id": 4,
      "isNew": true,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "max": true,
        "min": false,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 2,
      "links": [],
      "nullPointMode": "connected",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.3.1",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "sum without(instance)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\\"kafka\\",env=\\"$env\\",instance=~\\"$broker_id\\",topic=~\\"$topic\\"}[5m]))",
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{topic}}",
          "metric": "kafka_server_brokertopicmetrics_messagesinpersec",
          "refId": "A",
          "step": 4
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "每个topic消息速率",
      "tooltip": {
        "msResolution": false,
        "shared": true,
        "sort": 2,
        "value_type": 

以上是关于可直接拿来用的kafka+prometheus+grafana监控告警配置的主要内容,如果未能解决你的问题,请参考以下文章

在 Rancher 上向 Prometheus 和 Grafana 公开 Kafka 指标

可以直接拿来用的15个jQuery代码片段

几个可以直接拿来用的jQuery代码片段

几个可以直接拿来用的jQuery代码片段

那些拿来就能直接用的界面搭建好福利

拿来直接用的数据分析论