Elasticsearch 从复合聚合中排除键
Posted
技术标签:
【中文标题】Elasticsearch 从复合聚合中排除键【英文标题】:Elasticsearch exclude key from composite aggregation 【发布时间】:2021-11-06 00:43:09 【问题描述】:我需要在复合聚合中排除某些键。 这里以我的索引文件为例:
"end_date": 1230314400000,
"parameter_codes": [28, 35, 30],
"platform_code": "41012",
"start_date": 1230314400000,
"station_id": 7833246
我执行了一个搜索请求,允许我:获取每个 platform_code/parameter_codes 对的结果,以及获取对应的 station_id 以及存储桶上的分页。
这是请求:
"size": 0,
"query":
"match_all":
"boost": 1.0
,
"_source": false,
"aggregations":
"compositeAgg":
"composite":
"size": 10,
"sources": [
"platform_code":
"terms":
"field": "platform_code",
"missing_bucket": false,
"order": "asc"
,
"parameter_codes":
"terms":
"field": "parameter_codes",
"missing_bucket": false,
"order": "asc"
]
,
"aggregations":
"aggstation_id":
"terms":
"field": "station_id",
"size": 2147483647,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order":
"_key": "asc"
,
"pipe":
"bucket_sort":
"sort": [
"_key":
"order": "asc"
],
"from": 0,
"size": 10,
"gap_policy": "SKIP"
这个请求给了我以下结果:
"took": 3,
"timed_out": false,
"_shards":
"total": 8,
"successful": 8,
"skipped": 0,
"failed": 0
,
"hits":
"total":
"value": 3,
"relation": "eq"
,
"max_score": null,
"hits": []
,
"aggregations":
"composite#compositeAgg":
"after_key":
"platform_code": "41012",
"parameter_codes": 60
,
"buckets": [
"key":
"platform_code": "41012",
"parameter_codes": 28
,
"doc_count": 1,
"lterms#aggstation_id":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": 7833246,
"doc_count": 1
]
,
"key":
"platform_code": "41012",
"parameter_codes": 30
,
"doc_count": 2,
"lterms#aggstation_id":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": 7833246,
"doc_count": 1
,
"key": 12787501,
"doc_count": 1
]
,
"key":
"platform_code": "41012",
"parameter_codes": 35
,
"doc_count": 2,
"lterms#aggstation_id":
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
"key": 7833246,
"doc_count": 1
,
"key": 12787501,
"doc_count": 1
]
]
这很好用,但我需要排除一个或多个 parameter_code。 例如,通过排除“35”,我只想要键:
"platform_code": "41012",
"parameter_codes": 28
和
"platform_code": "41012",
"parameter_codes": 30
我尝试了很多选项,但无法成功执行此操作。 谁能知道我该怎么做?
【问题讨论】:
【参考方案1】:可以在复合源中使用脚本查询来仅返回数组的特定值。
"size": 0,
"query":
"match_all":
"boost": 1
,
"_source": false,
"aggregations":
"compositeAgg":
"composite":
"size": 10,
"sources": [
"platform_code":
"terms":
"field": "platform_code.keyword",
"missing_bucket": false,
"order": "asc"
,
"parameter_codes":
"terms":
"script":
"source": """
def arr=[];
for (item in doc['parameter_codes'])
if(item !=35)
arr.add(item);
return arr"""
]
,
"aggregations":
"aggstation_id":
"terms":
"field": "station_id",
"size": 2147483647,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order":
"_key": "asc"
,
"pipe":
"bucket_sort":
"sort": [
"_key":
"order": "asc"
],
"from": 0,
"size": 10,
"gap_policy": "SKIP"
【讨论】:
谢谢你,这似乎工作得很好。我曾考虑过使用脚本字段,但我不知道如何/在哪里编写它。 @Guilhem.Legal Glad 可能会有所帮助。如果解决了您的问题,请标记为答案并点赞【参考方案2】:您可以尝试从查询中排除“parameter_codes=35”这个选项。
"query":
"bool":
"must_not": [
"term":
"parameter_codes":
"value": "35"
]
【讨论】:
我已经试过了。如果我添加它,所有包含一个 parameter_codes '35' 的文档都将从结果中排除。所以我无法检索其他情侣平台代码/参数代码以上是关于Elasticsearch 从复合聚合中排除键的主要内容,如果未能解决你的问题,请参考以下文章