4.elasticsearch聚合分析
Posted PacosonSWJTU
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了4.elasticsearch聚合分析相关的知识,希望对你有一定的参考价值。
【README】
1.本文介绍了elasticsearch聚合分析的开发方式;
【1】聚合分析介绍
聚合提供了从数据中分组和提取数据的能力。
- 最简单的聚合方法类似于 group by 和 sql 聚合函数了;
- 在es中,搜索结果返回 hits(命中结果),并且同时返回聚合结果;
- 可以执行查询和多个聚合,并且在一次请求中得到各自的结果;
2)聚合开发文档:
Aggregations | Elasticsearch Guide [7.2] | Elastic
【1.1】agg-terms统计年龄分布及avg统计平均年龄
1)搜索address中包含 mill的所有人的年龄分布以及平均年龄;
- terms聚合结果是数据分布,如年龄为28的有3人,年龄为29的有5人;
- terms 类似于 sql中的 group by 关键字;
场景:查询address包含mill的文档,并基于此统计age分布(统计每个年龄值的个数),年龄均值,余额均值;(这就是一次查询请求,可以执行多种聚合的例子)
注意: match中的address是包含mill,而不是等于mill ,则符合条件;
// 【sql】
Select count(1) , avg(age), avg(balance)
From bank
Where address like '%mill%'
Group by age
// [ES AGG] elasticsearch聚合
Post localhost:9200/bank/_search
"query":
"match":
"address":"mill"
, "aggs": // 聚合关键字 aggs
"ageAgg":
"terms":
"field":"age"
, "size":10 // 仅统计出10种不同可能的结果
, "ageAvgAgg":
"avg":"field":"age" // 求age均值
, "balanceAvgAgg":
"avg":"field":"balance" // 求 balance余额 均值
, "size":0
// 聚合结果
"took": 20,
"timed_out": false,
"_shards":
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
,
"hits":
"total":
"value": 4,
"relation": "eq"
,
"max_score": null,
"hits": []
,
"aggregations":
"ageAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ // 年龄分组结果
"key": 38,
"doc_count": 2
,
"key": 28,
"doc_count": 1
,
"key": 32,
"doc_count": 1
]
,
"ageAvgAgg": // 年龄均值
"value": 34.0
,
"balanceAvgAgg": // 余额均值
"value": 25208.0
【1.2】嵌套聚合统计各个年龄值的人的平均薪资
1)场景:按照年龄聚合,并且基于此,统计这些年龄段的人的平均薪资;
如统计 年龄为31的61个人的平均薪资;
Post localhost:9200/bank/_search
"query":
"match_all":
, "aggs":
"ageAgg":
"terms":
"field":"age"
, "size":3
, "aggs": // 嵌套聚合,聚合中的聚合
"balanceAvgAggForEveryAge":
"avg":"field":"balance"
, "size":0
// 聚合结果
"took": 52,
"timed_out": false,
"_shards":
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
,
"hits":
"total":
"value": 1000,
"relation": "eq"
,
"max_score": null,
"hits": []
,
"aggregations":
"ageAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 820,
"buckets": [
"key": 31,
"doc_count": 61,
"balanceAvgAggForEveryAge":
"value": 28312.918032786885
,
"key": 39,
"doc_count": 60,
"balanceAvgAggForEveryAge":
"value": 25269.583333333332
,
"key": 26,
"doc_count": 59,
"balanceAvgAggForEveryAge":
"value": 23194.813559322032
]
【1.3】terms分组嵌套
场景:查询所有年龄分布,并基于此统计每个年龄段中性别为M和F的平均薪资
Post localhost:9200/bank/_searchlocalhost:9200/bank/_search
"query":
"match_all":
, "aggs":
"ageAgg":
"terms":
"field":"age"
, "size":3
, "aggs":
"wholeBalanceAvgAgg" :
"avg":"field":"balance"
, "genderAgg":
"terms":
"field":"gender.keyword"
, "aggs":
"balanceAvgAgg":
"avg":"field":"balance"
, "size":0
// 聚合结果
"took": 12,
"timed_out": false,
"_shards":
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
,
"hits":
"total":
"value": 1000,
"relation": "eq"
,
"max_score": null,
"hits": []
,
"aggregations":
"ageAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 820,
"buckets": [
"key": 31,
"doc_count": 61,
"wholeBalanceAvgAgg":
"value": 28312.918032786885
,
"genderAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": "M",
"doc_count": 35,
"balanceAvgAgg":
"value": 29565.628571428573
,
"key": "F",
"doc_count": 26,
"balanceAvgAgg":
"value": 26626.576923076922
]
,
"key": 39,
"doc_count": 60,
"wholeBalanceAvgAgg":
"value": 25269.583333333332
,
"genderAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": "F",
"doc_count": 38,
"balanceAvgAgg":
"value": 26348.684210526317
,
"key": "M",
"doc_count": 22,
"balanceAvgAgg":
"value": 23405.68181818182
]
,
"key": 26,
"doc_count": 59,
"wholeBalanceAvgAgg":
"value": 23194.813559322032
,
"genderAgg":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": "M",
"doc_count": 32,
"balanceAvgAgg":
"value": 25094.78125
,
"key": "F",
"doc_count": 27,
"balanceAvgAgg":
"value": 20943.0
]
]
以上是关于4.elasticsearch聚合分析的主要内容,如果未能解决你的问题,请参考以下文章