背景
今年写了一个数据中心的项目,其中有相当一部分的数据查询,用的是ES来做的,涉及到dsl的查询语句,从最开始的简单查询,到后面的复杂的查询,逐步掌握了ES的常用写法,现在总结一下。 文章内的称呼,没有按照ES的官方称呼,例如sql那边的表叫type,sql那边的行叫documents,sql那边的列或者字段叫fields。为了方便起见,统一按照sql的叫法。
查询语句解释说明
先看一个查询语句
示例四:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"terms": {
"order_status": [
2,
5,
9
]
}
},
{
"nested": {
"path": "order_detail_list",
"query": {
"terms": {
"order_detail_list.goods_id": [
136377796454,
136377796455,
136377796456
]
}
}
}
},
{
"range": {
"order_time": {
"gte": "2022-08-01 00:00:00",
"lte": "2022-08-30 23:59:59"
}
}
}
]
}
},
{
"bool": {
"must_not": [
{
"match_phrase": {
"user_remark": {
"query": "换货单"
}
}
}
]
}
}
]
}
},
"aggs": {
"total_order_count": {
"cardinality": {
"field": "union_pay_order_id",
"precision_threshold": 50000000
}
},
"order_detail": {
"nested": {
"path": "order_detail_list"
},
"aggs": {
"sum_all": {
"filter": {
"terms": {
"order_detail_list.goods_id": [
136377796454,
136377796455,
136377796456
]
}
},
"aggs": {
"total_money": {
"sum": {
"script": {
"lang": "painless",
"inline": "doc['order_detail_list.count'].value * doc['order_detail_list.price'].value"
}
}
},
"total_cost": {
"sum": {
"script": {
"lang": "painless",
"inline": "doc['order_detail_list.count'].value * doc['order_detail_list.purchase_dynavg_price'].value"
}
}
},
"total_count": {
"sum": {
"field": "order_detail_list.count"
}
}
}
}
}
},
"pay_type": {
"terms": {
"field": "pay_type"
},
"aggs": {
"order_deatil": {
"nested": {
"path": "order_detail_list"
},
"aggs": {
"group_by_paytype": {
"filter": {
"terms": {
"order_detail_list.goods_id": [
136377796454,
136377796455,
136377796456
]
}
},
"aggs": {
"discounts": {
"sum": {
"script": {
"lang": "painless",
"inline": "doc['order_detail_list.count'].value * doc['order_detail_list.discount_money'].value"
}
}
}
}
}
}
}
}
}
}
}
对应的查询结果:
{
"took": 16,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 20,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"total_order_count": {
"value": 20
},
"pay_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 12,
"doc_count": 17,
"order_deatil": {
"doc_count": 25,
"group_by_paytype": {
"doc_count": 25,
"discounts": {
"value": 0.0
}
}
}
},
{
"key": 5,
"doc_count": 3,
"order_deatil": {
"doc_count": 6,
"group_by_paytype": {
"doc_count": 6,
"discounts": {
"value": 0.0
}
}
}
}
]
},
"order_detail": {
"doc_count": 31,
"sum_all": {
"doc_count": 31,
"total_cost": {
"value": 41254.979919433594
},
"total_count": {
"value": 56.0
},
"total_money": {
"value": 5510.499999988824
}
}
}
}
}
抛开前面的条件不看,只看aggs部分的聚合语句 首先是对 进行去重计数:union_pay_order_id,照例加上“precision_threshold”,为了精准度,这部分前面说过。 再然后是进入嵌套的数据内层: 又加一层数据聚合过滤: 再后面就是对内层的字段进行运算并聚合,之前也记录过: 后面又有一个,跟前面的order_detail同级的另一个聚合,按照外层字段pay_type进行分组,然后再进行聚合结果: 这个语句实现了,对指定的商品ID,查询他们总的购买用户数,然后又计算了他们的购买金额,成本,以及件数。 然后又按照支付方式,统计了商品的优惠金额。
下面是另一个聚合的语句:
"aggregations": {
"discount_per_type": {
"terms": {
"field": "cnee_city.keyword"
},
"aggregations": {
"total_user_count": {
"cardinality": {
"field": "user_id",
"precision_threshold": 50000000
}
},
"total_order_money": {
"sum": {
"field": "order_money"
}
},
"total_order_cost": {
"sum": {
"field": "purchase_dynavg_money"
}
}
}
}
}
这里是按照cnee_city聚合的,由于cnee_city这里面,都是汉字,由于ES倒排索引的机制,在使用这个字段聚合的时候,需要在后面加上“.keyword”,当然还有其他的方法,这个是最简单的。
结尾
这一部分就先写到这里,这只是我对于ES的自己的总结,难免会有不完整以及错误的地方,欢迎指正。
|