1. 初始化es
from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': '192.168.171.81', 'port': 9200}], timeout=3600)
2. 创建index
request_body={
"mappings": {
"properties": {
"name": {"type": "keyword"},
"age": {"type": "keyword"},
"sex": {"type": "keyword"},
"address": {"type": "keyword"},
"sect": {"type": "keyword"},
"skill": {"type": "keyword"},
"power": {"type": "keyword"},
"create_time": {"type": "keyword"},
"modify_time": {"type": "keyword"}
}
}
}
response = es.indices.create(index='example_index', body=request_body)
返回结果response :
{
"acknowledged": true,
"index": "example_index",
"shards_acknowledged": true
}
3. 添加数据
3.1 普通方式添加数据
data = {
"name": "赵敏",
"age": "16",
"sex": "f",
"address": "大都",
"sect": "朝廷",
"skill": "无",
"power": "40",
"create_time": "2022-4-18 14:34:47",
"modify_time": "2022-4-18 14:34:52"
}
response = es.index(index="example_index", body=data)
response 返回结果如下:
{
"_id": "k5NiO4ABj1R4dwhU4Go9",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 2,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 1,
"result": "created"
}
3.2 使用bulk批量添加数据
借助elasticsearch helpers 工具进行添加,需要引入:from elasticsearch import helpers
bulk_data = [{'_index': 'example_index', '_source': {
"name": "张无忌",
"age": "19",
"sex": "m",
"address": "光明顶",
"sect": "明教",
"skill": "九阳神功",
"power": "99",
"create_time": "2022-4-18 11:25:24",
"modify_time": "2022-4-18 11:25:46"
}}, {'_index': 'example_index', '_source': {
"name": "周芷若",
"age": "17",
"sex": "f",
"address": "峨眉山",
"sect": "峨眉派",
"skill": "九阴真经",
"power": "88",
"create_time": "2022-4-18 11:27:40",
"modify_time": "2022-4-18 11:27:48"
}}]
response = helpers.bulk(es, bulk_data)
response 返回结果如下:
[
2,
[]
]
表明正常添加了两条数据。 此外,也可以通过下面这种bulk的方式来进行添加:
body = [
{"index": {"_index": "example_index"}},
{"name": "张三丰", "age": "90", "sex": "m", "address": "武当山", "sect": "武当派", "skill": "太极", "power": "95", "create_time": "2022-4-18 14:59:34", "modify_time": "2022-4-18 14:59:44"},
{"index": {"_index": "example_index"}},
{"name": "宋远桥", "age": "40", "sex": "m", "address": "武当山", "sect": "武当派", "skill": "太极", "power": "60", "create_time": "2022-4-18 15:02:08", "modify_time": "2022-4-18 15:02:15"},
]
response = es.bulk(body)
这种方式的response 的返回结果如下:
{
"errors": false,
"items": [
{
"index": {
"_id": "lJN9O4ABj1R4dwhUgGrI",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 3,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 1,
"result": "created",
"status": 201
}
},
{
"index": {
"_id": "lZN9O4ABj1R4dwhUgGrI",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 4,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 1,
"result": "created",
"status": 201
}
}
],
"took": 14
}
可以看出他将每条数据的插入信息都返回了。
4. 删除数据
4.1 普通方式删除数据
response = es.delete(index="example_index", id="k5NiO4ABj1R4dwhU4Go9")
返回response如下:
{
"_id": "k5NiO4ABj1R4dwhU4Go9",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 9,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 2,
"result": "deleted"
}
4.2 使用bulk批量删除数据
body = [
{"delete": {"_index": "example_index", "_id": "lJN9O4ABj1R4dwhUgGrI"}},
{"delete": {"_index": "example_index", "_id": "lZN9O4ABj1R4dwhUgGrI"}},
]
response = es.bulk(body)
return jsonify(response)
返回结果response如下:
{
"errors": false,
"items": [
{
"delete": {
"_id": "lJN9O4ABj1R4dwhUgGrI",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 5,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 2,
"result": "deleted",
"status": 200
}
},
{
"delete": {
"_id": "lZN9O4ABj1R4dwhUgGrI",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 6,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 2,
"result": "deleted",
"status": 200
}
}
],
"took": 16
}
4.3 按条件删除数据
query = {
"query": {
"bool": {
"must": [
{
"term": {
"name": {
"value": "赵敏"
}
}
}
]
}
}
}
response = es.delete_by_query(index='example_index', body=query)
返回结果response如下:
{
"batches": 1,
"deleted": 1,
"failures": [],
"noops": 0,
"requests_per_second": -1.0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"throttled_until_millis": 0,
"timed_out": false,
"took": 34,
"total": 1,
"version_conflicts": 0
}
5. 更新数据
5.1 普通方式更新
原始文档数据:
{
"name" : "赵敏",
"age" : "16",
"sex" : "f",
"address" : "大都",
"sect" : "朝廷",
"skill" : "无",
"power" : "40",
"create_time" : "2022-4-18 14:34:47",
"modify_time" : "2022-4-18 14:34:52"
}
需求:我们需要将age 改为17 ,代码如下:
data = {
"doc": {"age": "17"}
}
response = es.update(index='example_index', id='mZOyO4ABj1R4dwhUb2r6', body=data)
需要注意的是更新的data 中需要包含doc ,doc 里面才是更新的数据。 返回response信息如下:
{
"_id": "mZOyO4ABj1R4dwhUb2r6",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 13,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 2,
"result": "updated"
}
5.2 使用bulk批量修改
body = [
{"update": {"_id": "mZOyO4ABj1R4dwhUb2r6", "_index": "example_index"}},
{"doc": {"age": "18"}},
{"update": {"_id": "kZMpO4ABj1R4dwhUfWpA", "_index": "example_index"}},
{"doc": {"age": "20", "skill": "九阳神功, 乾坤大挪移", "父亲": "张翠山"}}
]
response = es.bulk(body)
返回response 信息如下:
{
"errors": false,
"items": [
{
"update": {
"_id": "mZOyO4ABj1R4dwhUb2r6",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 14,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 3,
"result": "updated",
"status": 200
}
},
{
"update": {
"_id": "kZMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_primary_term": 1,
"_seq_no": 15,
"_shards": {
"failed": 0,
"successful": 1,
"total": 2
},
"_version": 2,
"result": "updated",
"status": 200
}
}
],
"took": 171
}
6. 查询
6.1 等值查询
等值查询,即筛选出一个字段等于特定值的所有记录。 SQL:
select * from example_index where name = '张无忌';
python:
query = {
"query": {
"bool": {
"must": [
{
"term": {
"name": {
"value": "赵敏"
}
}
}
]
}
}
}
response = es.search(index='example_index', size=1, body=query)
返回response结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "mZOyO4ABj1R4dwhUb2r6",
"_index": "example_index",
"_score": 1.1631508,
"_source": {
"address": "大都",
"age": "18",
"create_time": "2022-4-18 14:34:47",
"modify_time": "2022-4-18 14:34:52",
"name": "赵敏",
"power": "40",
"sect": "朝廷",
"sex": "f",
"skill": "无"
}
}
],
"max_score": 1.1631508,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": false,
"took": 3
}
我们可以看到返回结果中包含_score ,ES会根据结果匹配程度进行评分。打分是会耗费性能的,如果确认自己的查询不需要评分,就设置查询语句关闭评分。因此我们常常使用filter 进行查询:
query = {
"query": {
"bool": {
"filter": [
{
"term": {
"name": "赵敏"
}
}
]
}
}
}
response = es.search(index='example_index', size=1, body=query)
通过filter 查询的结果response如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "mZOyO4ABj1R4dwhUb2r6",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "大都",
"age": "18",
"create_time": "2022-4-18 14:34:47",
"modify_time": "2022-4-18 14:34:52",
"name": "赵敏",
"power": "40",
"sect": "朝廷",
"sex": "f",
"skill": "无"
}
}
],
"max_score": 0.0,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": false,
"took": 3
}
可以看到返回结果中没有了计算的评分score ,这种方式可以节省性能。
6.2 多值查询
多条件查询类似Mysql里的IN查询,例如: SQL:
select * from persons where sect in('明教','武当派');
Python:
query = {
"query": {
"bool": {
"filter": [
{
"terms": {
"sect": [
"武当派",
"明教"
]
}
}
]
}
}
}
response = es.search(index='example_index', size=100, body=query)
返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "lpOYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "武当山",
"age": "90",
"create_time": "2022-4-18 14:59:34",
"modify_time": "2022-4-18 14:59:44",
"name": "张三丰",
"power": "95",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
},
{
"_id": "l5OYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "武当山",
"age": "40",
"create_time": "2022-4-18 15:02:08",
"modify_time": "2022-4-18 15:02:15",
"name": "宋远桥",
"power": "60",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
},
{
"_id": "kZMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "光明顶",
"age": "20",
"create_time": "2022-4-18 11:25:24",
"modify_time": "2022-4-18 11:25:46",
"name": "张无忌",
"power": "99",
"sect": "明教",
"sex": "m",
"skill": "九阳神功, 乾坤大挪移",
"父亲": "张翠山"
}
}
],
"max_score": 0.0,
"total": {
"relation": "eq",
"value": 3
}
},
"timed_out": false,
"took": 3
}
使用filter 进行查询,得到的结果中没有score ,可以提升查询性能。
6.3 范围查询
范围查询,即查询某字段在特定区间的记录。 SQL:
select * from example_index where age between 10 and 30;
python
query = {
"query": {
"range": {
"age": {
"gte": 10,
"lte": 30
}
}
}
}
response = es.search(index='example_index', size=100, body=query)
返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "kpMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 1.0,
"_source": {
"address": "峨眉山",
"age": "17",
"create_time": "2022-4-18 11:27:40",
"modify_time": "2022-4-18 11:27:48",
"name": "周芷若",
"power": "88",
"sect": "峨眉派",
"sex": "f",
"skill": "九阴真经"
}
},
{
"_id": "mZOyO4ABj1R4dwhUb2r6",
"_index": "example_index",
"_score": 1.0,
"_source": {
"address": "大都",
"age": "18",
"create_time": "2022-4-18 14:34:47",
"modify_time": "2022-4-18 14:34:52",
"name": "赵敏",
"power": "40",
"sect": "朝廷",
"sex": "f",
"skill": "无"
}
},
{
"_id": "kZMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 1.0,
"_source": {
"address": "光明顶",
"age": "20",
"create_time": "2022-4-18 11:25:24",
"modify_time": "2022-4-18 11:25:46",
"name": "张无忌",
"power": "99",
"sect": "明教",
"sex": "m",
"skill": "九阳神功, 乾坤大挪移",
"父亲": "张翠山"
}
}
],
"max_score": 1.0,
"total": {
"relation": "eq",
"value": 3
}
},
"timed_out": false,
"took": 3
}
6.4 前缀查询
前缀查询类似于SQL中的模糊查询。 SQL:
select * from persons where sect like '武当%';
Python
query = {
"query": {
"bool": {
"filter": [
{
"prefix": {
"sect": "武当"
}
}
]
}
}
}
response = es.search(index='example_index', size=100, body=query)
返回结果信息如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "lpOYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "武当山",
"age": "90",
"create_time": "2022-4-18 14:59:34",
"modify_time": "2022-4-18 14:59:44",
"name": "张三丰",
"power": "95",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
},
{
"_id": "l5OYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "武当山",
"age": "40",
"create_time": "2022-4-18 15:02:08",
"modify_time": "2022-4-18 15:02:15",
"name": "宋远桥",
"power": "60",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
}
],
"max_score": 0.0,
"total": {
"relation": "eq",
"value": 2
}
},
"timed_out": false,
"took": 5
}
6.5 通配符查询-wildcard
通配符查询,与前缀查询类似,都属于模糊查询的范畴,但通配符显然功能更强。 SQL:
select * from persons where name like '张%忌';
Python
query = {
"query": {
"bool": {
"filter": [
{
"wildcard": {
"name": {
"value": "张*"
}
}
}
]
}
}
}
response = es.search(index='example_index', size=100, body=query)
返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "lpOYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "武当山",
"age": "90",
"create_time": "2022-4-18 14:59:34",
"modify_time": "2022-4-18 14:59:44",
"name": "张三丰",
"power": "95",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
},
{
"_id": "kZMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 0.0,
"_source": {
"address": "光明顶",
"age": "20",
"create_time": "2022-4-18 11:25:24",
"modify_time": "2022-4-18 11:25:46",
"name": "张无忌",
"power": "99",
"sect": "明教",
"sex": "m",
"skill": "九阳神功, 乾坤大挪移",
"父亲": "张翠山"
}
}
],
"max_score": 0.0,
"total": {
"relation": "eq",
"value": 2
}
},
"timed_out": false,
"took": 6
}
7 复合查询
前面的例子都是单个条件查询,在实际应用中,我们很有可能会过滤多个值或字段。先看一个简单的例子: SQL:
select * from persons where sex = '女' and sect = '明教';
这样的多条件等值查询,就要借用到组合过滤器了,其查询语句是: Python
query = {
"query": {
"bool": {
"must": [
{
"term": {
"sex": {
"value": "m"
}
}
}, {
"term": {
"sect": {
"value": "武当派"
}
}
}
]
}
}
}
response = es.search(index='example_index', size=100, body=query)
这里也可以将must 变为filter ,这样可以不用计算score 。 返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "lpOYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 1.7385149,
"_source": {
"address": "武当山",
"age": "90",
"create_time": "2022-4-18 14:59:34",
"modify_time": "2022-4-18 14:59:44",
"name": "张三丰",
"power": "95",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
},
{
"_id": "l5OYO4ABj1R4dwhUv2qJ",
"_index": "example_index",
"_score": 1.7385149,
"_source": {
"address": "武当山",
"age": "40",
"create_time": "2022-4-18 15:02:08",
"modify_time": "2022-4-18 15:02:15",
"name": "宋远桥",
"power": "60",
"sect": "武当派",
"sex": "m",
"skill": "太极"
}
}
],
"max_score": 1.7385149,
"total": {
"relation": "eq",
"value": 2
}
},
"timed_out": false,
"took": 2
}
7.1 布尔查询
布尔过滤器(bool filter)属于复合过滤器(compound filter)的一种 ,可以接受多个其他过滤器作为参数,并将这些过滤器结合成各式各样的布尔(逻辑)组合。 bool 过滤器下可以有4种子条件,可以任选其中任意一个或多个。filter是比较特殊的,这里先不说。
{"bool":{"must":[],"should":[],"must_not":[],}}
- must:所有的语句都必须匹配,与 ‘=’ 等价。
- must_not:所有的语句都不能匹配,与 ‘!=’ 或 not in 等价。
- should:至少有n个语句要匹配,n由参数控制。
精度控制: 所有must 语句必须匹配,所有must_not 语句都必须不匹配,但有多少should 语句应该匹配呢?默认情况下,没有should 语句是必须匹配的,只有一个例外:那就是当没有must 语句的时候,至少有一个should 语句必须匹配。
我们可以通过minimum_should_match 参数控制需要匹配的should 语句的数量,它既可以是一个绝对的数字,又可以是个百分比:
query = {
"query": {
"bool": {
"must": [
{
"term": {
"sex": {
"value": "f"
}
}
}
],
"should": [
{
"term": {
"address": {
"value": "峨眉山"
}
}
},
{
"term": {
"address": {
"value": "光明顶"
}
}
}
],
"minimum_should_match": "1"
}
}
}
response = es.search(index='example_index', size=100, body=query)
逻辑条件相当于A and (B or C) ,B 和C 至少满足一个条件。 返回信息如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "kpMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 2.500655,
"_source": {
"address": "峨眉山",
"age": "17",
"create_time": "2022-4-18 11:27:40",
"modify_time": "2022-4-18 11:27:48",
"name": "周芷若",
"power": "88",
"sect": "峨眉派",
"sex": "f",
"skill": "九阴真经"
}
}
],
"max_score": 2.500655,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": false,
"took": 4
}
8. filter查询
在ES中,提供了query context 和filter context 两种搜索:
- query context:会对搜索结果进行相关性评分,可以理解为“文档与查询有多相关?”,分数越高,相关程度越高。
- filter context:不需要相关性算分,能够利用缓存来获得更好的性能。可以理解为“文档是否与查询条件匹配?”。不会计算分数,且往往会缓存来提升性能。
filter context 会作用于以下场景:
- 在 bool query 下的
filter 参数与 must_not 参数 - 在
constant_score 查询下的 filter 参数 - filter 聚合
query context 会作用于query ,bool 中的must 和should 。
8.1 单独使用filter
{
"query":
{
"bool":
{
"filter":
[
{
"term":
{
"sex":
{
"value": "m",
}
}
}
]
}
}
}
单独使用时,filter与must基本一样,不同的是filter不计算评分,效率更高。
8.2 和must、must_not同级,相当于子查询
SQL:
select * from (select * from persons where sect = '明教')) a where sex = 'm';
python:
query = {
"query": {
"bool": {
"must": [
{
"term": {
"sect": {
"value": "明教"
}
}
}
],
"filter": [
{
"term": {
"sex": "m"
}
}
]
}
}
}
response = es.search(index='example_index', size=100, body=query)
返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"hits": {
"hits": [
{
"_id": "kZMpO4ABj1R4dwhUfWpA",
"_index": "example_index",
"_score": 1.1631508,
"_source": {
"address": "光明顶",
"age": "20",
"create_time": "2022-4-18 11:25:24",
"modify_time": "2022-4-18 11:25:46",
"name": "张无忌",
"power": "99",
"sect": "明教",
"sex": "m",
"skill": "九阳神功, 乾坤大挪移",
"父亲": "张翠山"
}
}
],
"max_score": 1.1631508,
"total": {
"relation": "eq",
"value": 1
}
},
"timed_out": false,
"took": 2
}
9. 聚合查询
9.1 最值、平均值、求和
查询最大年龄、最小年龄、平均年龄。 SQL:
select max(age) from persons;
python:
query = {
"aggregations": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
response = es.search(index='example_index', size=0, body=query)
9.2 去重查询
查询一共有多少个门派。 SQL:
select count(distinct sect) from example_index;
python
query = {
"aggregations": {
"sect_count": {
"cardinality": {
"field": "sect"
}
}
}
}
response = es.search(index='example_index', size=0, body=query)
返回结果如下:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"sect_count": {
"value": 4
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 5
}
},
"timed_out": false,
"took": 2
}
9.3 单条件分组
查询每个门派的人数 SQL:
select sect,count(id) from example_index group by sect;
python
query = {
"size": 0,
"aggregations": {
"sect_count": {
"terms": {
"field": "sect",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": 'false',
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}
}
response = es.search(index='example_index', size=0, body=query)
返回结果:
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"sect_count": {
"buckets": [
{
"doc_count": 2,
"key": "武当派"
},
{
"doc_count": 1,
"key": "峨眉派"
},
{
"doc_count": 1,
"key": "明教"
},
{
"doc_count": 1,
"key": "朝廷"
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 5
}
},
"timed_out": false,
"took": 2
}
9.4 多条件分组
查询每个门派各有多少个男性和女性。 SQL:
select sect,sex,count(id) from example_index group by sect,sex;
python
query={
"aggregations": {
"sect_count": {
"terms": {
"field": "sect",
"size": 10
},
"aggregations": {
"sex_count": {
"terms": {
"field": "sex",
"size": 10
}
}
}
}
}
}
返回结果:
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
]
},
"aggregations" : {
"sect_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "武当派",
"doc_count" : 2,
"sex_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "m",
"doc_count" : 2
}
]
}
},
{
"key" : "峨眉派",
"doc_count" : 1,
"sex_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "f",
"doc_count" : 1
}
]
}
},
{
"key" : "明教",
"doc_count" : 1,
"sex_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "m",
"doc_count" : 1
}
]
}
},
{
"key" : "朝廷",
"doc_count" : 1,
"sex_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "f",
"doc_count" : 1
}
]
}
}
]
}
}
}
9.5 过滤聚合
前面所有聚合的例子请求都省略了 query ,整个请求只不过是一个聚合。这意味着我们对全部数据进行了聚合,但现实应用中,我们常常对特定范围的数据进行聚合,例如下例:
查询明教中的最大年龄。这涉及到聚合与条件查询一起使用。
SQL:
select max(age) from example_index where sect = '明教';
python:
query = {
"query": {
"term": {
"sect.keyword": {
"value": "明教",
"boost": 1.0
}
}
},
"aggregations": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
另外还有一些更复杂的查询例子。
案例:查询0-20,21-40,41-60,61以上的各有多少人。
SQL:
select
sum(case when age<=20 then 1 else 0 end) ageGroup1,
sum(case when age >20 and age <=40 then 1 else 0 end) ageGroup2,
sum(case when age >40 and age <=60 then 1 else 0 end) ageGroup3,
sum(case when age >60 and age <=200 then 1 else 0 end) ageGroup4
from example_index
python:
{
"size": 0,
"aggregations": {
"age_avg": {
"range": {
"field": "age",
"ranges": [
{
"from": 0.0,
"to": 20.0
},
{
"from": 21.0,
"to": 40.0
},
{
"from": 41.0,
"to": 60.0
},
{
"from": 61.0,
"to": 200.0
}
],
"keyed": false
}
}
}
}
|