ElasticSearch-2
ElasticSearch进阶篇
_Search检索文档
前面讲的那个是查询单条记录,可以理解为无condition的select
ES支持两种基本方式的检索:
- 通过uri+condition(检索参数)
- 通过uri+requestBody(请求体)
uri+condition
GET bank/_search?q=*&sort=account_number:asc
只返回10条数据
uri+requestBody
GET /bank/_search
#请求体被称为QueryDSL
{
"query": { "match_all": {} },
"sort": [
{ "account_number": "asc" },
{ "balance":"desc"}
]
}
QueryDSL
最佳实战
GET /bank/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"balance": { #给banlance字段排序
"order": "desc"
}
}
],
"from": 0, #分页搜索
"size":5,
"_source": ["balance","age"]
}
query/match 匹配查询
精确查询
GET /bank/_search
{
"query": {
"match": {
"account_number": "594" #就相当于SQL中的"where xxx=xxx"
}
}
}
结果:
{
"took" : 9,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "bank",
"_type" : "account",
"_id" : "594",
"_score" : 1.0,
"_source" : {
"account_number" : 594,
"balance" : 28194,
"firstname" : "Golden",
"lastname" : "Donovan",
"age" : 26,
"gender" : "M",
"address" : "199 Jewel Street",
"employer" : "Organica",
"email" : "goldendonovan@organica.com",
"city" : "Macdona",
"state" : "RI"
}
}
]
}
}
模糊匹配(有分词效果)
最佳实践
和精确匹配的语法无差别
GET /bank/_search
{
"query": {
"match": {
"address": "mill lane"
}
}
}
结果:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 19,
"relation" : "eq"
},
"max_score" : 9.507477,
"hits" : [
{
"_index" : "bank",
"_type" : "account",
"_id" : "136",
"_score" : 9.507477,
"_source" : {
"account_number" : 136,
"balance" : 45801,
"firstname" : "Winnie",
"lastname" : "Holland",
"age" : 38,
"gender" : "M",
"address" : "198 Mill Lane",
"employer" : "Neteria",
"email" : "winnieholland@neteria.com",
"city" : "Urie",
"state" : "IL"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "970",
"_score" : 5.4032025,
"_source" : {
"account_number" : 970,
"balance" : 19648,
"firstname" : "Forbes",
"lastname" : "Wallace",
"age" : 28,
"gender" : "M",
"address" : "990 Mill Road",
"employer" : "Pheast",
"email" : "forbeswallace@pheast.com",
"city" : "Lopezo",
"state" : "AK"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "345",
"_score" : 5.4032025,
"_source" : {
"account_number" : 345,
"balance" : 9812,
"firstname" : "Parker",
"lastname" : "Hines",
"age" : 38,
"gender" : "M",
"address" : "715 Mill Avenue",
"employer" : "Baluba",
"email" : "parkerhines@baluba.com",
"city" : "Blackgum",
"state" : "KY"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "472",
"_score" : 5.4032025,
"_source" : {
"account_number" : 472,
"balance" : 25571,
"firstname" : "Lee",
"lastname" : "Long",
"age" : 32,
"gender" : "F",
"address" : "288 Mill Street",
"employer" : "Comverges",
"email" : "leelong@comverges.com",
"city" : "Movico",
"state" : "MT"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "1",
"_score" : 4.1042743,
"_source" : {
"account_number" : 1,
"balance" : 39225,
"firstname" : "Amber",
"lastname" : "Duke",
"age" : 32,
"gender" : "M",
"address" : "880 Holmes Lane",
"employer" : "Pyrami",
"email" : "amberduke@pyrami.com",
"city" : "Brogan",
"state" : "IL"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "70",
"_score" : 4.1042743,
"_source" : {
"account_number" : 70,
"balance" : 38172,
"firstname" : "Deidre",
"lastname" : "Thompson",
"age" : 33,
"gender" : "F",
"address" : "685 School Lane",
"employer" : "Netplode",
"email" : "deidrethompson@netplode.com",
"city" : "Chestnut",
"state" : "GA"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "556",
"_score" : 4.1042743,
"_source" : {
"account_number" : 556,
"balance" : 36420,
"firstname" : "Collier",
"lastname" : "Odonnell",
"age" : 35,
"gender" : "M",
"address" : "591 Nolans Lane",
"employer" : "Sultraxin",
"email" : "collierodonnell@sultraxin.com",
"city" : "Fulford",
"state" : "MD"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "568",
"_score" : 4.1042743,
"_source" : {
"account_number" : 568,
"balance" : 36628,
"firstname" : "Lesa",
"lastname" : "Maynard",
"age" : 29,
"gender" : "F",
"address" : "295 Whitty Lane",
"employer" : "Coash",
"email" : "lesamaynard@coash.com",
"city" : "Broadlands",
"state" : "VT"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "715",
"_score" : 4.1042743,
"_source" : {
"account_number" : 715,
"balance" : 23734,
"firstname" : "Tammi",
"lastname" : "Hodge",
"age" : 24,
"gender" : "M",
"address" : "865 Church Lane",
"employer" : "Netur",
"email" : "tammihodge@netur.com",
"city" : "Lacomb",
"state" : "KS"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "449",
"_score" : 4.1042743,
"_source" : {
"account_number" : 449,
"balance" : 41950,
"firstname" : "Barnett",
"lastname" : "Cantrell",
"age" : 39,
"gender" : "F",
"address" : "945 Bedell Lane",
"employer" : "Zentility",
"email" : "barnettcantrell@zentility.com",
"city" : "Swartzville",
"state" : "ND"
}
}
]
}
}
GET /bank/_search
{
"query": {
"match": {
"address": "milllane"
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
match:"milllane"和"mall lane"差别如此之大,说明了什么?
说明ES内部做了分词匹配(倒排索引),mall lane会被拆分成 mall lane,那么address中只要含有这两个单词其中一个或者两个都包含的文档就都会被返回(但是所得分数不同)。
query/match返回的结果按照得分从高到低进行排序。
短语匹配
短语匹配(不分词匹配)math_phrase
如果我们不想把"mill lane"进行分词 而是把它当成一个整个的短语 那么就可以使用math_phrase。
GET /bank/_search
{
"query": {
"match_phrase": {
"address": "mill lane"
}
}
}
结果:
{
"took" : 11,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 9.507477,
"hits" : [
{
"_index" : "bank",
"_type" : "account",
"_id" : "136",
"_score" : 9.507477,
"_source" : {
"account_number" : 136,
"balance" : 45801,
"firstname" : "Winnie",
"lastname" : "Holland",
"age" : 38,
"gender" : "M",
"address" : "198 Mill Lane",
"employer" : "Neteria",
"email" : "winnieholland@neteria.com",
"city" : "Urie",
"state" : "IL"
}
}
]
}
}
多条件匹配
multi_match=n*math(多条件匹配相当于对多个字段进行模糊匹配)
GET /bank/_search
{
"query": {
"multi_match": {
"query": "mill brogan",
"fields": ["address","city"]
}
}
}
#对address、city属性进行分词查询"mill lane"
Bool查询(复合查询)
- must:key必须匹配value
- must_not:key必须不匹配value
- should:不满足也可以,但是满足更好。**即,should并不会改变查询到的结果 ** 满足的话可以提高score,should改变的只是查询结果里面的score
最佳实战
address和gender必须同时满足
GET /bank/_search
{
"query": {
"bool": {
"must": [
{"match": {
"address": "mill"
}},
{
"match": {
"gender": "M"
}
}
]
, "must_not": [
{"match": {
"age": "0"
}}
]
, "should": [
{ "match": {
"lastname": "Holland"
}
}
]
}
}
}
Filter过滤
在bool查询(复合查询)中还可以指定filter。
filter最大的特点就是:和must_not一样,filter并不贡献文档得分。仅仅起到过滤作用。
GET /bank/_search
{
"query": {
"bool": {
"filter": {
"range": {
"age": {
"gte": 18,
"lte": 30
}
}
}
}
}
}
结果:可以看到每个文档的得分都是0
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 498,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "bank",
"_type" : "account",
"_id" : "13",
"_score" : 0.0,
"_source" : {
"account_number" : 13,
"balance" : 32838,
"firstname" : "Nanette",
"lastname" : "Bates",
"age" : 28,
"gender" : "F",
"address" : "789 Madison Street",
"employer" : "Quility",
"email" : "nanettebates@quility.com",
"city" : "Nogal",
"state" : "VA"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "49",
"_score" : 0.0,
"_source" : {
"account_number" : 49,
"balance" : 29104,
"firstname" : "Fulton",
"lastname" : "Holt",
"age" : 23,
"gender" : "F",
"address" : "451 Humboldt Street",
"employer" : "Anocha",
"email" : "fultonholt@anocha.com",
"city" : "Sunriver",
"state" : "RI"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "63",
"_score" : 0.0,
"_source" : {
"account_number" : 63,
"balance" : 6077,
"firstname" : "Hughes",
"lastname" : "Owens",
"age" : 30,
"gender" : "F",
"address" : "510 Sedgwick Street",
"employer" : "Valpreal",
"email" : "hughesowens@valpreal.com",
"city" : "Guilford",
"state" : "KS"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "68",
"_score" : 0.0,
"_source" : {
"account_number" : 68,
"balance" : 44214,
"firstname" : "Hall",
"lastname" : "Key",
"age" : 25,
"gender" : "F",
"address" : "927 Bay Parkway",
"employer" : "Eventex",
"email" : "hallkey@eventex.com",
"city" : "Shawmut",
"state" : "CA"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "75",
"_score" : 0.0,
"_source" : {
"account_number" : 75,
"balance" : 40500,
"firstname" : "Sandoval",
"lastname" : "Kramer",
"age" : 22,
"gender" : "F",
"address" : "166 Irvington Place",
"employer" : "Overfork",
"email" : "sandovalkramer@overfork.com",
"city" : "Limestone",
"state" : "NH"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "87",
"_score" : 0.0,
"_source" : {
"account_number" : 87,
"balance" : 1133,
"firstname" : "Hewitt",
"lastname" : "Kidd",
"age" : 22,
"gender" : "M",
"address" : "446 Halleck Street",
"employer" : "Isologics",
"email" : "hewittkidd@isologics.com",
"city" : "Coalmont",
"state" : "ME"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "94",
"_score" : 0.0,
"_source" : {
"account_number" : 94,
"balance" : 41060,
"firstname" : "Brittany",
"lastname" : "Cabrera",
"age" : 30,
"gender" : "F",
"address" : "183 Kathleen Court",
"employer" : "Mixers",
"email" : "brittanycabrera@mixers.com",
"city" : "Cornucopia",
"state" : "AZ"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "102",
"_score" : 0.0,
"_source" : {
"account_number" : 102,
"balance" : 29712,
"firstname" : "Dena",
"lastname" : "Olson",
"age" : 27,
"gender" : "F",
"address" : "759 Newkirk Avenue",
"employer" : "Hinway",
"email" : "denaolson@hinway.com",
"city" : "Choctaw",
"state" : "NJ"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "107",
"_score" : 0.0,
"_source" : {
"account_number" : 107,
"balance" : 48844,
"firstname" : "Randi",
"lastname" : "Rich",
"age" : 28,
"gender" : "M",
"address" : "694 Jefferson Street",
"employer" : "Netplax",
"email" : "randirich@netplax.com",
"city" : "Bellfountain",
"state" : "SC"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "119",
"_score" : 0.0,
"_source" : {
"account_number" : 119,
"balance" : 49222,
"firstname" : "Laverne",
"lastname" : "Johnson",
"age" : 28,
"gender" : "F",
"address" : "302 Howard Place",
"employer" : "Senmei",
"email" : "lavernejohnson@senmei.com",
"city" : "Herlong",
"state" : "DC"
}
}
]
}
}
Term&.keyword
- 文本字段:match
- 非文本字段,精确匹配:term
文本字段的精确匹配
- match_phrase:将值作为一个整体,不再分词,但是查询结果中只要有这个短语即可
- match + 属性.keyword:精确匹配,就是=,没有花里胡哨的
aggs聚合
就是mysql中的分组聚合部分
##aggs检索后开始聚合 s代表不只一个聚合
##ageAggs聚合名
##terms(项)展示有多少个不同的值,每个值有多少条文档
##avg求平均
##size=0不看命中的结果 只看聚合结果
GET /bank/_search
{
"query": {
"match": {
"address": "mill"
}
},
"aggs": {
"ageAggs": {
"terms": {
"field": "age",
"size": 10
}
},
"ageAvg":{
"avg": {
"field": "age"
}
},
"banlanceAvg":{
"avg": {
"field": "balance"
}
}
},
"size": 0
}
##按照年龄聚合,并且请求这些年龄段的人的平均薪资
GET /bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"age_term": {
"terms": {
"field": "age",
"size": 100
},
"aggs": {
"balance_avg": {
"avg": {
"field": "balance"
}
}
}
}
},
"size": 0
}
##查出所有年龄分布,并且这些年龄段中M的平均薪资和F的平均薪资以及这个年龄段的总体平均薪资
GET /bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"age_term": {
"terms": {
"field": "age",
"size": 100
},
"aggs": {
"allbalance_avg":{
"avg": {
"field": "balance"
}
},
"gender_term": {
"terms": {
"field": "gender.keyword",
"size": 2
},
"aggs": {
"balance_avg": {
"avg": {
"field": "balance"
}
}
}
}
}
}
},
"size": 0
}
Mapping映射
ES中的映射=》mysql的数据类型
ElasticSearch7-去掉type(类型)概念 以后直接【索引–》文档】
关系型数据库中两个数据表示是独立的,即使他们里面有相同名称的列也不影响使用,但ES中不是这样的。elasticsearch是基于Lucene开发的搜索引擎,而ES中不同type下名称相同的filed最终在Lucene中的处理方式是一样的。
两个不同type下的两个user_name,在ES同一个索引下其实被认为是同一个filed,你必须在两个不同的type中定义相同的filed映射。否则,不同type中的相同字段名称就会在处理中出现冲突的情况,导致Lucene处理效率下降。 去掉type就是为了提高ES处理数据的效率。 Elasticsearch 7.x URL中的type参数为可选。比如,索引一个文档不再要求提供文档类型。
Elasticsearch 8.x 不再支持URL中的type参数。
解决: 将索引从多类型迁移到单类型,每种类型文档一个独立索引
将已存在的索引下的类型数据,全部迁移到指定位置即可。详见数据迁移
我们可以在创建索引的时候来指定该索引下文档的映射。如果不指定,ES会自动推测出映射。
创建索引时指定映射
PUT /my_index
{
"mappings": {
"properties": {
"age":{"type": "integer"}, #默认会推测成long
"email":{"type": "keyword"}, #keyword类型不会全文检索
"name":{"type": "text"} #text会被全文检索
}
}
}
添加新的字段映射
PUT /my_index/_mapping
PUT /my_index/_mapping
{
"properties": {
"employee-id": {
"type": "keyword",
"index": false # 字段不能被检索。检索
}
}
}
index:false 表明新的字段不能被检索,只是一个冗余字段。默认index都是为true的。
我们不能更新映射(只能添加新的字段), 如果一定要更新映射,必须创建新的索引,进行数据迁移
数据迁移
6.0以后写法
POST _reindex
{
"source":{
"index":"twitter"
},
"dest":{
"index":"new_twitters"
}
}
老版本写法
POST _reindex
{
"source":{
"index":"twitter",
"twitter":"twitter"
},
"dest":{
"index":"new_twitters"
}
}
创建新的索引(不需要全文检索的全部设置成keyward)
PUT /newbank
{
"mappings": {
"properties": {
"account_number": {
"type": "long"
},
"address": {
"type": "text"
},
"age": {
"type": "integer"
},
"balance": {
"type": "long"
},
"city": {
"type": "keyword"
},
"email": {
"type": "keyword"
},
"employer": {
"type": "keyword"
},
"firstname": {
"type": "text"
},
"gender": {
"type": "keyword"
},
"lastname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state": {
"type": "keyword"
}
}
}
}
然后调用上面代码进行迁移
分词
一个tokenizer(分词器)接收一个字符流,将之分割为独立的tokens(词元,通常是独立的单词),然后输出tokens流。
例如:whitespace tokenizer遇到空白字符时分割文本。它会将文本"Quick brown fox!"分割为[Quick,brown,fox!]
该tokenizer(分词器)还负责记录各个terms(词条)的顺序或position位置(用于phrase短语和word proximity词近邻查询),以及term(词条)所代表的原始word(单词)的start(起始)和end(结束)的character offsets(字符串偏移量)(用于高亮显示搜索的内容)。
elasticsearch提供了很多内置的分词器(标准分词器),可以用来构建custom analyzers(自定义分词器)。
关于分词器: https://www.elastic.co/guide/en/elasticsearch/reference/7.6/analysis.html
##标准分词器
POST _analyze
{
"analyzer": "standard",
"text": "we are family"
}
结果:
{
"tokens" : [
{
"token" : "we",
"start_offset" : 0,
"end_offset" : 2,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "are",
"start_offset" : 3,
"end_offset" : 6,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "family",
"start_offset" : 7,
"end_offset" : 13,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
但是它不能分词中文
POST _analyze
{
"analyzer": "standard",
"text": "男儿当自强"
}
{
"tokens" : [
{
"token" : "男",
"start_offset" : 0,
"end_offset" : 1,
"type" : "<IDEOGRAPHIC>",
"position" : 0
},
{
"token" : "儿",
"start_offset" : 1,
"end_offset" : 2,
"type" : "<IDEOGRAPHIC>",
"position" : 1
},
{
"token" : "当",
"start_offset" : 2,
"end_offset" : 3,
"type" : "<IDEOGRAPHIC>",
"position" : 2
},
{
"token" : "自",
"start_offset" : 3,
"end_offset" : 4,
"type" : "<IDEOGRAPHIC>",
"position" : 3
},
{
"token" : "强",
"start_offset" : 4,
"end_offset" : 5,
"type" : "<IDEOGRAPHIC>",
"position" : 4
}
]
}
so我们要
安装ik分词器
1、查看es的版本号
curl可以发送请求
[vagrant@localhost ~]$ curl http://localhost:9200
{
"name" : "66718a266132",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "xhDnsLynQ3WyRdYmQk5xhQ",
"version" : {
"number" : "7.4.2",
"build_flavor" : "default",
"build_type" : "docker",
"build_hash" : "2f90bbf7b93631e52bafb59b3b049cb44ec25e96",
"build_date" : "2019-10-28T20:40:44.881551Z",
"build_snapshot" : false,
"lucene_version" : "8.2.0",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
},
"tagline" : "You Know, for Search"
}
2、进入到挂载es plugin的文件夹后
yum install wget
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.4.2/elasticsearch-analysis-ik-7.4.2.zip
3、解压
unzip elasticsearch-analysis-ik-7.4.2.zip -d ik
chmod -R 777 ik
4、重启 es
docker restart elasticsearch
测试分词器
GET _analyze
{
"analyzer": "ik_smart",
"text": "我是中国人"
}
{
"tokens" : [
{
"token" : "我",
"start_offset" : 0,
"end_offset" : 1,
"type" : "CN_CHAR",
"position" : 0
},
{
"token" : "是",
"start_offset" : 1,
"end_offset" : 2,
"type" : "CN_CHAR",
"position" : 1
},
{
"token" : "中国人",
"start_offset" : 2,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 2
}
]
}
GET _analyze
{
"analyzer": "ik_max_word",
"text": "我是中国人"
}
结果:
{
"tokens" : [
{
"token" : "我",
"start_offset" : 0,
"end_offset" : 1,
"type" : "CN_CHAR",
"position" : 0
},
{
"token" : "是",
"start_offset" : 1,
"end_offset" : 2,
"type" : "CN_CHAR",
"position" : 1
},
{
"token" : "中国人",
"start_offset" : 2,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "中国",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "国人",
"start_offset" : 3,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 4
}
]
}
自定义词库
安装nginx by docker
docker pull nginx:1.10
docker run -p 80:80 --name nginx -d nginx:1.10
mkdir -p /mydata/nginx
cd /mydata/nginx
docker container cp nginx:/etc/nginx ./conf
docker stop nginx
docker rm nginx
docker run -p 80:80 --name nginx \
-v /mydata/nginx/html:/usr/share/nginx/html \
-v /mydata/nginx/logs:/var/log/nginx \
-v /mydata/nginx/conf:/etc/nginx \
-d nginx:1.10
docker update nginx --restart=always
测试
cd /mydata/nginx/html/
vim index.html
随便写写
测试 http://192.168.56.10:80
比如我们现在要把尚硅谷作为一个词
修改/mydata/elasticsearch/plugins/ik/config/IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<entry key="ext_dict"></entry>
<entry key="ext_stopwords"></entry>
<entry key="remote_ext_dict">http://192.168.56.10/es/fenci.txt</entry>
</properties>
重启es容器
docker restart es
SpringBoot + elasticsearch-Rest-Client
java操作es有两种方式:
- 9300端口:TCP(不推荐)
- spring-data-elasticsearch:transport-api.jar;
- springboot版本不同,ransport-api.jar不同,不能适配es版本
- 7.x已经不建议使用,8以后就要废弃
- 9200端口:HTTP(√)
- 有诸多包(其实只要是可以发送http请求的包都可以)
- jestClient: 非官方,更新慢;
- RestTemplate:模拟HTTP请求,ES很多操作需要自己封装,麻烦;
- HttpClient:同上;
Elasticsearch-Rest-Client :官方RestClient,封装了ES操作,API层次分明,上手简单;
开整~
创建gulimall-search moudle 选择web依赖 但是不要选择es,原因见上
1、导入依赖(注意版本)
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.4.2</version>
</dependency>
这里的es不是7.4.2 需要手动改动
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.4.2</elasticsearch.version>
</properties>
2、导入es的配置
请求测试项,如果es添加了安全访问规则,访问es需要添加一个安全头,这个时候我们就可以通过requestOptions来设置
并且建议将其设为单例的
package config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class GuliEsConfig {
public static final RequestOptions COMMON_OPTIONS;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
COMMON_OPTIONS = builder.build();
}
@Bean
public RestHighLevelClient esRestClient() {
RestClientBuilder builder = null;
builder = RestClient.builder(new HttpHost("192.168.56.10", 9200, "http"));
RestHighLevelClient client = new RestHighLevelClient(builder);
return client;
}
}
插入数据
@Test
public void indexData() throws IOException {
IndexRequest indexRequest = new IndexRequest("users");
indexRequest.id("1");
User user = new User();
user.setName("张三");
user.setAge(20);
user.setGender("男");
String jsonString = JSON.toJSONString(user);
indexRequest.source(jsonString, XContentType.JSON);
IndexResponse index = client.index(indexRequest, GuliEsConfig.COMMON_OPTIONS);
System.out.println(index);
}
结果
IndexResponse[index=users,type=_doc,id=1,version=1,result=created,seqNo=0,primaryTerm=1,shards={"total":2,"successful":1,"failed":0}]
查询数据
最佳实践
@Test
public void find() throws IOException {
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("bank");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("address","mill"));
System.out.println("请求参数:"+searchSourceBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse response = client.search(searchRequest,GuliEsConfig.COMMON_OPTIONS);
System.out.println("查询结果:"+response);
}
请求参数: {
"query": {
"match": {
"address": {
"query": "mill",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
}
}
查询结果: {
"took": 6,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 4,
"relation": "eq"
},
"max_score": 5.4032025,
"hits": [{
"_index": "bank",
"_type": "account",
"_id": "970",
"_score": 5.4032025,
"_source": {
"account_number": 970,
"balance": 19648,
"firstname": "Forbes",
"lastname": "Wallace",
"age": 28,
"gender": "M",
"address": "990 Mill Road",
"employer": "Pheast",
"email": "forbeswallace@pheast.com",
"city": "Lopezo",
"state": "AK"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "136",
"_score": 5.4032025,
"_source": {
"account_number": 136,
"balance": 45801,
"firstname": "Winnie",
"lastname": "Holland",
"age": 38,
"gender": "M",
"address": "198 Mill Lane",
"employer": "Neteria",
"email": "winnieholland@neteria.com",
"city": "Urie",
"state": "IL"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "345",
"_score": 5.4032025,
"_source": {
"account_number": 345,
"balance": 9812,
"firstname": "Parker",
"lastname": "Hines",
"age": 38,
"gender": "M",
"address": "715 Mill Avenue",
"employer": "Baluba",
"email": "parkerhines@baluba.com",
"city": "Blackgum",
"state": "KY"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "472",
"_score": 5.4032025,
"_source": {
"account_number": 472,
"balance": 25571,
"firstname": "Lee",
"lastname": "Long",
"age": 32,
"gender": "F",
"address": "288 Mill Street",
"employer": "Comverges",
"email": "leelong@comverges.com",
"city": "Movico",
"state": "MT"
}
}]
}
}
@Test
public void find() throws IOException {
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("bank");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("address","mill"));
TermsAggregationBuilder agg1 = AggregationBuilders.terms("agg1").field("age").size(10);
searchSourceBuilder.aggregation(agg1);
AvgAggregationBuilder agg2 = AggregationBuilders.avg("agg2").field("balance");
searchSourceBuilder.aggregation(agg2);
System.out.println("请求参数:"+searchSourceBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse response = client.search(searchRequest,GuliEsConfig.COMMON_OPTIONS);
System.out.println("查询结果:"+response);
}
请求参数: {
"query": {
"match": {
"address": {
"query": "mill",
}
}
},
"aggregations": {
"agg1": {
"terms": {
"field": "age",
"size": 10,
}
},
"agg2": {
"avg": {
"field": "balance"
}
}
}
}
查询结果: {
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 4,
"relation": "eq"
},
"max_score": 5.4032025,
"hits": [{
"_index": "bank",
"_type": "account",
"_id": "970",
"_score": 5.4032025,
"_source": {
"account_number": 970,
"balance": 19648,
"firstname": "Forbes",
"lastname": "Wallace",
"age": 28,
"gender": "M",
"address": "990 Mill Road",
"employer": "Pheast",
"email": "forbeswallace@pheast.com",
"city": "Lopezo",
"state": "AK"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "136",
"_score": 5.4032025,
"_source": {
"account_number": 136,
"balance": 45801,
"firstname": "Winnie",
"lastname": "Holland",
"age": 38,
"gender": "M",
"address": "198 Mill Lane",
"employer": "Neteria",
"email": "winnieholland@neteria.com",
"city": "Urie",
"state": "IL"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "345",
"_score": 5.4032025,
"_source": {
"account_number": 345,
"balance": 9812,
"firstname": "Parker",
"lastname": "Hines",
"age": 38,
"gender": "M",
"address": "715 Mill Avenue",
"employer": "Baluba",
"email": "parkerhines@baluba.com",
"city": "Blackgum",
"state": "KY"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "472",
"_score": 5.4032025,
"_source": {
"account_number": 472,
"balance": 25571,
"firstname": "Lee",
"lastname": "Long",
"age": 32,
"gender": "F",
"address": "288 Mill Street",
"employer": "Comverges",
"email": "leelong@comverges.com",
"city": "Movico",
"state": "MT"
}
}]
},
聚合的结果
"aggregations": {
"avg#agg2": {
"value": 25208.0
},
"lterms#agg1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": 38,
"doc_count": 2
}, {
"key": 28,
"doc_count": 1
}, {
"key": 32,
"doc_count": 1
}]
}
}
}
转换Bean
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1) {
String sourceAsString = hit.getSourceAsString();
Account account = JSON.parseObject(sourceAsString, Account.class);
System.out.println(account);
}
分析信息
Aggregations aggregations = response.getAggregations();
Terms agg21 = aggregations.get("agg2");
for (Terms.Bucket bucket : agg21.getBuckets()) {
bucket.getKeyAsString();
bucket.getDocCount();
}
"account_number": 345,
"balance": 9812,
"firstname": "Parker",
"lastname": "Hines",
"age": 38,
"gender": "M",
"address": "715 Mill Avenue",
"employer": "Baluba",
"email": "parkerhines@baluba.com",
"city": "Blackgum",
"state": "KY"
}
}, {
"_index": "bank",
"_type": "account",
"_id": "472",
"_score": 5.4032025,
"_source": {
"account_number": 472,
"balance": 25571,
"firstname": "Lee",
"lastname": "Long",
"age": 32,
"gender": "F",
"address": "288 Mill Street",
"employer": "Comverges",
"email": "leelong@comverges.com",
"city": "Movico",
"state": "MT"
}
}]
},
聚合的结果
"aggregations": {
"avg#agg2": {
"value": 25208.0
},
"lterms#agg1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": 38,
"doc_count": 2
}, {
"key": 28,
"doc_count": 1
}, {
"key": 32,
"doc_count": 1
}]
}
}
}
**转换Bean**
```java
//获取大hits
SearchHits hits = response.getHits();
//获取每个小hits
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1) {
String sourceAsString = hit.getSourceAsString();
//json==>obj
Account account = JSON.parseObject(sourceAsString, Account.class);
System.out.println(account);
}
分析信息
Aggregations aggregations = response.getAggregations();
Terms agg21 = aggregations.get("agg2");
for (Terms.Bucket bucket : agg21.getBuckets()) {
bucket.getKeyAsString();
bucket.getDocCount();
}
|