Elasticsearch指标监控说明

Posted 顧棟

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Elasticsearch指标监控说明相关的知识,希望对你有一定的参考价值。

Elasticsearch指标监控说明

指标维度

可以把指标分为6个维度。
在这里插入图片描述
下面的表格总结了上一篇文章中提到搜索性能,索引性能,内存性能,网络性能对应的ES API。其中有些性能数据是从多个维度描述的,比如搜索性能在节点维度和索引维度都有提供。

Metric categoryAvailabilityDescription
Search performance metricsNode Stats API, Index Stats API查询性能指标
Indexing performance metricsNode Stats API, Index Stats API写入性能指标
Memory and garbage collectionNode Stats API, Cluster Stats API内存与GC性能指标
Network metricsNode Stats API网络情况
Cluster health and node availabilityCluster Health API集群健康和节点的可用性指标
Resource saturation and errorsNode Stats API, Index Stats API, Cluster Stats API, Pending Tasks API资源状态和错误信息指标

指标分级

Elasticsearch集群监控状态指标分三个级别

集群级别

集群级别的监控主要是针对整个Elasticsearch集群来说,包括集群的健康状况、集群的状态等。

节点级别

节点级别的监控主要是针对每个Elasticsearch实例的监控,其中包括每个实例的查询索引指标和物理资源使用指标。

索引级别

索引级别的监控主要是针对每个索引来说,主要包括每个索引的性能指标。

指标分布如图
在这里插入图片描述

API使用示例

主要有五个HTTP REST API可用于采集Elasticsearch监控指标:

  • Cluster Health API
  • Cluster Stats API
  • Node Stats API
  • Index Stats API
  • Pending Tasks API

集群健康情况

GET /_cluster/health?human&pretty

结果示例

{
  "cluster_name": "es-platform-log",
  "status": "green",
  "timed_out": false,
  "number_of_nodes": 9,
  "number_of_data_nodes": 6,
  "active_primary_shards": 80,
  "active_shards": 166,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 0,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue": "0s",
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent": "100.0%",
  "active_shards_percent_as_number": 100
}

统计集群信息

GET /_cluster/stats?human&pretty

结果示例

{
  "_nodes": {
    "total": 9,
    "successful": 9,
    "failed": 0
  },
  "cluster_name": "es-platform-log",
  "cluster_uuid": "M0qBb884SWCYd8KjQcpFQg",
  "timestamp": 1625657783874,
  "status": "green",
  "indices": {
    "count": 34,
    "shards": {
      "total": 166,
      "primaries": 80,
      "replication": 1.075,
      "index": {
        "shards": {
          "min": 2,
          "max": 20,
          "avg": 4.882352941176471
        },
        "primaries": {
          "min": 1,
          "max": 5,
          "avg": 2.3529411764705883
        },
        "replication": {
          "min": 0,
          "max": 3,
          "avg": 1
        }
      }
    },
    "docs": {
      "count": 794178337,
      "deleted": 8595881
    },
    "store": {
      "size": "813.1gb",
      "size_in_bytes": 873069900311
    },
    "fielddata": {
      "memory_size": "1.7kb",
      "memory_size_in_bytes": 1840,
      "evictions": 0
    },
    "query_cache": {
      "memory_size": "1gb",
      "memory_size_in_bytes": 1099581304,
      "total_count": 3012012,
      "hit_count": 2854064,
      "miss_count": 157948,
      "cache_size": 3570,
      "cache_count": 5415,
      "evictions": 1845
    },
    "completion": {
      "size": "0b",
      "size_in_bytes": 0
    },
    "segments": {
      "count": 659,
      "memory": "898.8mb",
      "memory_in_bytes": 942513495,
      "terms_memory": "387.5mb",
      "terms_memory_in_bytes": 406388453,
      "stored_fields_memory": "483.7mb",
      "stored_fields_memory_in_bytes": 507295448,
      "term_vectors_memory": "0b",
      "term_vectors_memory_in_bytes": 0,
      "norms_memory": "18kb",
      "norms_memory_in_bytes": 18432,
      "points_memory": "22.9mb",
      "points_memory_in_bytes": 24036474,
      "doc_values_memory": "4.5mb",
      "doc_values_memory_in_bytes": 4774688,
      "index_writer_memory": "9.7mb",
      "index_writer_memory_in_bytes": 10228272,
      "version_map_memory": "260b",
      "version_map_memory_in_bytes": 260,
      "fixed_bit_set": "733.2kb",
      "fixed_bit_set_memory_in_bytes": 750848,
      "max_unsafe_auto_id_timestamp": 1625616004589,
      "file_sizes": {}
    }
  },
  "nodes": {
    "count": {
      "total": 9,
      "data": 6,
      "coordinating_only": 0,
      "master": 3,
      "ingest": 9
    },
    "versions": [
      "6.7.2"
    ],
    "os": {
      "available_processors": 360,
      "allocated_processors": 360,
      "names": [
        {
          "name": "Linux",
          "count": 9
        }
      ],
      "pretty_names": [
        {
          "pretty_name": "CentOS Linux 7 (Core)",
          "count": 9
        }
      ],
      "mem": {
        "total": "1.1tb",
        "total_in_bytes": 1211755806720,
        "free": "21.7gb",
        "free_in_bytes": 23325560832,
        "used": "1tb",
        "used_in_bytes": 1188430245888,
        "free_percent": 2,
        "used_percent": 98
      }
    },
    "process": {
      "cpu": {
        "percent": 0
      },
      "open_file_descriptors": {
        "min": 869,
        "max": 1000,
        "avg": 948
      }
    },
    "jvm": {
      "max_uptime": "297.7d",
      "max_uptime_in_millis": 25724732930,
      "versions": [
        {
          "version": "1.8.0_161",
          "vm_name": "Java HotSpot(TM) 64-Bit Server VM",
          "vm_version": "25.161-b12",
          "vm_vendor": "Oracle Corporation",
          "count": 9
        }
      ],
      "mem": {
        "heap_used": "135.7gb",
        "heap_used_in_bytes": 145777776144,
        "heap_max": "279gb",
        "heap_max_in_bytes": 299573968896
      },
      "threads": 2654
    },
    "fs": {
      "total": "43tb",
      "total_in_bytes": 47300324622336,
      "free": "42.6tb",
      "free_in_bytes": 46859340689408,
      "available": "42.1tb",
      "available_in_bytes": 46376411246592
    },
    "plugins": [
      {
        "name": "analysis-pinyin",
        "version": "6.7.2.4",
        "elasticsearch_version": "6.7.2",
        "java_version": "1.8",
        "description": "Pinyin Analysis for Elasticsearch",
        "classname": "org.elasticsearch.plugin.analysis.pinyin.AnalysisPinyinPlugin",
        "extended_plugins": [],
        "has_native_controller": false
      },
      {
        "name": "analysis-dynamic-synonym",
        "version": "6.7.2.4",
        "elasticsearch_version": "6.7.2",
        "java_version": "1.8",
        "description": "Analysis-plugin for synonym",
        "classname": "com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin",
        "extended_plugins": [],
        "has_native_controller": false
      },
      {
        "name": "repository-sdoss",
        "version": "6.7.2.4",
        "elasticsearch_version": "6.7.2",
        "java_version": "1.8",
        "description": "The Sdoss repository plugin adds support for SDFS or other sdoss server repositories.",
        "classname": "org.elasticsearch.repositories.sdoss.SdossPlugin",
        "extended_plugins": [],
        "has_native_controller": false
      },
      {
        "name": "analysis-ik",
        "version": "6.7.2.4",
        "elasticsearch_version": "6.7.2",
        "java_version": "1.8",
        "description": "IK Analyzer for Elasticsearch",
        "classname": "org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin",
        "extended_plugins": [],
        "has_native_controller": false
      }
    ],
    "network_types": {
      "transport_types": {
        "netty4": 9
      },
      "http_types": {
        "netty4": 9
      }
    }
  }
}

统计索引信息

GET /_stats

GET /index1,index2/_stats

可以在URL中增减参数,进行部分指标的统计 ,如GET /_stats/merge,refresh

参数值说明
docsThe number of docs / deleted docs (docs not yet merged out). Note, affected by refreshing the index.
storeThe size of the index.
indexingIndexing statistics, can be combined with a comma separated list of types to provide document type level stats.
getGet statistics, including missing stats.
searchSearch statistics including suggest statistics. You can include statistics for custom groups by adding an extra groups parameter (search operations can be associated with one or more groups). The groups parameter accepts a comma separated list of group names. Use _all to return statistics for all groups.
segmentsRetrieve the memory use of the open segments. Optionally, setting the include_segment_file_sizes flag, report the aggregated disk usage of each one of the Lucene index files.
completionCompletion suggest statistics.
fielddataFielddata statistics.
flushFlush statistics.
mergeMerge statistics.
request_cacheShard request cache statistics.
refreshRefresh statistics.
warmerWarmer statistics.
translogTranslog statistics.

结果示例

{
  "_shards": {
    "total": 4,
    "successful": 4,
    "failed": 0
  },
  "_all": {
    "primaries": {
      "docs": {
        "count": 791173931,
        "deleted": 8580767
      },
      "store": {
        "size_in_bytes": 437042050823
      },
      "indexing": {
        "index_total": 1005140339,
        "index_time_in_millis": 234948689,
        "index_current": 0,
        "index_failed": 0,
        "delete_total": 458968359,
        "delete_time_in_millis": 90953371,
        "delete_current": 0,
        "noop_update_total": 0,
        "is_throttled": false,
        "throttle_time_in_millis": 0
      },
      "get": {
        "total": 0,
        "time_in_millis": 0,
        "exists_total": 0,
        "exists_time_in_millis": 0,
        "missing_total": 0,
        "missing_time_in_millis": 0,
        "current": 0
      },
      "search": {
        "open_contexts": 0,
        "query_total": 527285,
        "query_time_in_millis": 38479084,
        "query_current": 0,
        "fetch_total": 307477,
        "fetch_time_in_millis": 7206781,
        "fetch_current": 0,
        "scroll_total": 51,
        "scroll_time_in_millis": 358349299,
        "scroll_current": 0,
        "suggest_total": 0,
        "suggest_time_in_millis": 0,
        "suggest_current": 0
      },
      "merges": {
        "current": 0,
        "current_docs": 0,
        "current_size_in_bytes": 0,
        "total": 2973208,
        "total_time_in_millis": 972619673,
        "total_docs": 14744942732,
        "total_size_in_bytes": 7712062295680,
        "total_stopped_time_in_millis": 0,
        "total_throttled_time_in_millis": 530615641,
        "total_auto_throttle_in_bytes": 10485760
      },
      "refresh": {
        "total": 26691087,
        "total_time_in_millis": 474628597,
        "listeners": 0
      },
      "flush": {
        "total": 3305,
        "periodic": 3301,
        "total_time_in_millis": 1497051
      },
      "warmer": {
        "current": 0,
        "total": 26662293,
        "total_time_in_millis": 493330
      },
      "query_cache": {
        "memory_size_in_bytes": 443995504,
        "total_count": 2037568,
        "hit_count": 1964092,
        "miss_count": 73476,
        "cache_size": 1436,
        "cache_count": 2735,
        "evictions": 1299
      },
      "fielddata": {
        "memory_size_in_bytes": 0,
        "evictions": 0
      },
      "completion": {
        "size_in_bytes": 0
      },
      "segments": {
        "count": 138,
        "memory_in_bytes": 467411612,
        "terms_memory_in_bytes": 202202784,
        "stored_fields_memory_in_bytes": 254003248,
        "term_vectors_memory_in_bytes": 0,
        "norms_memory_in_bytes": 8832,
        "points_memory_in_bytes": 11182860,
        "doc_values_memory_in_bytes": 13888,
        "index_writer_memory_in_bytes": 665540,
        "version_map_memory_in_bytes": 0,
        "fixed_bit_set_memory_in_bytes": 0,
        "max_unsafe_auto_id_timestamp": 1619778779940,
        "file_sizes": {}
      },
      "translog": {
        "operations": 639821,
        "size_in_bytes": 1156652496,
        "uncommitted_operations": 297497,
        "uncommitted_size_in_bytes": 619765088,
        "earliest_last_modified_age": 0
      },
      "request_cache": {
        "memory_size_in_bytes": 0,
        "evictions": 0,
        "hit_count": 39,
        "miss_count": 90
      },
      "recovery": {
        "current_as_source": 0,
        "current_as_target": 0,
        "throttle_time_in_millis": 1331100
      }
    },
    "total": {
      "docs": {
        "count": 1582347865,
        "deleted": 14594676
      },
      "store": {
        "size_in_bytes": 869264426131
      },
      "indexing": {
        "index_total": 2014175604,
        "index_time_in_millis": 479642467,
        "index_current": 0,
        "index_failed": 0,
        "delete_total": 754975511,
        "delete_time_in_millis": 169701198,
        "delete_current"MetricBeat + Elasticsearch + Kibana 实现监控指标可视化

运维工程师监控工作之Elasticsearch关键指标采集方法

监控Elasticsearch

ElasticSearch 集群监控

如何监控 Elasticsearch 集群状态?

elasticsearch 性能监控基础