4.elasticsearch聚合分析

Posted PacosonSWJTU

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了4.elasticsearch聚合分析相关的知识,希望对你有一定的参考价值。

【README】

1.本文介绍了elasticsearch聚合分析的开发方式;


【1】聚合分析介绍

聚合提供了从数据中分组和提取数据的能力。

  • 最简单的聚合方法类似于 group by 和 sql 聚合函数了;
  • 在es中,搜索结果返回 hits(命中结果),并且同时返回聚合结果;
  • 可以执行查询和多个聚合,并且在一次请求中得到各自的结果

2)聚合开发文档:

Aggregations | Elasticsearch Guide [7.2] | Elastic


 【1.1】agg-terms统计年龄分布及avg统计平均年龄

1)搜索address中包含 mill的所有人的年龄分布以及平均年龄;

  • terms聚合结果是数据分布,如年龄为28的有3人,年龄为29的有5人;
  • terms 类似于 sql中的 group by 关键字

场景:查询address包含mill的文档,并基于此统计age分布(统计每个年龄值的个数),年龄均值,余额均值;(这就是一次查询请求,可以执行多种聚合的例子)

注意: match中的address是包含mill,而不是等于mill ,则符合条件

// 【sql】
Select count(1) , avg(age), avg(balance)
From bank
Where address like '%mill%'
Group by age

// [ES AGG] elasticsearch聚合
Post  localhost:9200/bank/_search 

    "query":
        "match":
            "address":"mill"
        
    
    , "aggs": // 聚合关键字 aggs 
        "ageAgg": 
            "terms":
                "field":"age"
                , "size":10 // 仅统计出10种不同可能的结果  
            
        
        , "ageAvgAgg": 
            "avg":"field":"age" // 求age均值 
        
        , "balanceAvgAgg":
            "avg":"field":"balance" // 求 balance余额 均值  
        
    
    , "size":0


// 聚合结果 

    "took": 20,
    "timed_out": false,
    "_shards": 
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    ,
    "hits": 
        "total": 
            "value": 4,
            "relation": "eq"
        ,
        "max_score": null,
        "hits": []
    ,
    "aggregations": 
        "ageAgg": 
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [ // 年龄分组结果 
                
                    "key": 38,
                    "doc_count": 2
                ,
                
                    "key": 28,
                    "doc_count": 1
                ,
                
                    "key": 32,
                    "doc_count": 1
                
            ]
        ,
        "ageAvgAgg":  // 年龄均值 
            "value": 34.0
        ,
        "balanceAvgAgg":  // 余额均值
            "value": 25208.0
        
    


【1.2】嵌套聚合统计各个年龄值的人的平均薪资

1)场景:按照年龄聚合,并且基于此,统计这些年龄段的人的平均薪资;
如统计 年龄为31的61个人的平均薪资;

Post localhost:9200/bank/_search 

    "query":
        "match_all":
    
    , "aggs":
        "ageAgg":
            "terms":
                "field":"age"
                , "size":3
            
            , "aggs": // 嵌套聚合,聚合中的聚合 
                "balanceAvgAggForEveryAge":
                    "avg":"field":"balance"
                
            
                
    
    , "size":0


// 聚合结果 

    "took": 52,
    "timed_out": false,
    "_shards": 
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    ,
    "hits": 
        "total": 
            "value": 1000,
            "relation": "eq"
        ,
        "max_score": null,
        "hits": []
    ,
    "aggregations": 
        "ageAgg": 
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 820,
            "buckets": [
                
                    "key": 31,
                    "doc_count": 61,
                    "balanceAvgAggForEveryAge": 
                        "value": 28312.918032786885
                    
                ,
                
                    "key": 39,
                    "doc_count": 60,
                    "balanceAvgAggForEveryAge": 
                        "value": 25269.583333333332
                    
                ,
                
                    "key": 26,
                    "doc_count": 59,
                    "balanceAvgAggForEveryAge": 
                        "value": 23194.813559322032
                    
                
            ]
        
    

【1.3】terms分组嵌套

场景:查询所有年龄分布,并基于此统计每个年龄段中性别为M和F的平均薪资

Post localhost:9200/bank/_searchlocalhost:9200/bank/_search

    "query":
        "match_all":
    
    , "aggs":
        "ageAgg":
            "terms":
                "field":"age"
                , "size":3
            
           , "aggs":
               "wholeBalanceAvgAgg" :
                   "avg":"field":"balance"
               
               , "genderAgg":
                    "terms":
                        "field":"gender.keyword"
                    
                    , "aggs":
                        "balanceAvgAgg":
                                "avg":"field":"balance"
                            
                    
                
           
                
    
    , "size":0


// 聚合结果 

    "took": 12,
    "timed_out": false,
    "_shards": 
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    ,
    "hits": 
        "total": 
            "value": 1000,
            "relation": "eq"
        ,
        "max_score": null,
        "hits": []
    ,
    "aggregations": 
        "ageAgg": 
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 820,
            "buckets": [
                
                    "key": 31,
                    "doc_count": 61,
                    "wholeBalanceAvgAgg": 
                        "value": 28312.918032786885
                    ,
                    "genderAgg": 
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            
                                "key": "M",
                                "doc_count": 35,
                                "balanceAvgAgg": 
                                    "value": 29565.628571428573
                                
                            ,
                            
                                "key": "F",
                                "doc_count": 26,
                                "balanceAvgAgg": 
                                    "value": 26626.576923076922
                                
                            
                        ]
                    
                ,
                
                    "key": 39,
                    "doc_count": 60,
                    "wholeBalanceAvgAgg": 
                        "value": 25269.583333333332
                    ,
                    "genderAgg": 
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            
                                "key": "F",
                                "doc_count": 38,
                                "balanceAvgAgg": 
                                    "value": 26348.684210526317
                                
                            ,
                            
                                "key": "M",
                                "doc_count": 22,
                                "balanceAvgAgg": 
                                    "value": 23405.68181818182
                                
                            
                        ]
                    
                ,
                
                    "key": 26,
                    "doc_count": 59,
                    "wholeBalanceAvgAgg": 
                        "value": 23194.813559322032
                    ,
                    "genderAgg": 
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            
                                "key": "M",
                                "doc_count": 32,
                                "balanceAvgAgg": 
                                    "value": 25094.78125
                                
                            ,
                            
                                "key": "F",
                                "doc_count": 27,
                                "balanceAvgAgg": 
                                    "value": 20943.0
                                
                            
                        ]
                    
                
            ]
        
    

以上是关于4.elasticsearch聚合分析的主要内容,如果未能解决你的问题,请参考以下文章

sql聚合函数的使用

oracle--聚合函数和case when结合使用

Django Query 每天将两行中的值聚合为单个结果

通过 BigQuery 上的更改事件聚合时间序列

[学习ES系列]-4.ElasticSearch基础交互-基础查询与高级查询

新技术与注会