springboot集成elasticsearch全文搜索高亮显示实践

本文案例，在英文文章索引下中搜索包含指定单词的文章，对包含指定单词的句子高亮显示。主要介绍在springboot中如何集成elasticsearch，以及常用api。

引入依赖

<parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.3.0.RELEASE</version>
        <relativePath/> 
</parent>
<dependencies>
		<dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
</dependencies>

配置连接

import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;

/**
 * @author liu
 */
@Configuration
public class ElasticSearchClient extends AbstractElasticsearchConfiguration {
    @Override
    @Bean
    public RestHighLevelClient elasticsearchClient() {
        ClientConfiguration clientConfiguration = ClientConfiguration.builder()
                .connectedTo("127.0.0.1:9200").build();
        return RestClients.create(clientConfiguration).rest();
    }
}

添加数据（添加索引的文档）

索引相当于是数据库，一个文档相当于是具体一条数据，表对应的概念是类型（type）但是在elasticsearch最新版以及取消type了。

添加index以及document的代码如下：

@Data
@Document(indexName = "article")//指定index，也可以指定type
public class BdArticle implements Serializable {

    private static final long serialVersionUID=1L;
	//指定在elasticsearch中的id
    @TableId(value = "id", type = IdType.AUTO)
    @Id
    private Long id;
    //指定在elasticsearch中字段类型，还可以设置sort
    @Field(type = FieldType.Text)
    private String title;
    @Field(type = FieldType.Text)
    private String photo;
    @Field(type = FieldType.Text)
    private String context;
    @Field(type = FieldType.Text)
    private Long wordCount;
    @Field(type = FieldType.Text)
    private Long createTime;
    @Field(type = FieldType.Text)
    private Long updateTime;
}

把字段排除的注解也有，在实体类影时可以操作的地方很多，大家可以补充。

@Service
public class ArticleServiceImpl implements ArticleService {

    @Autowired
    private ElasticsearchOperations elasticsearchOperations;

    @Override
    @Transactional(rollbackFor = Exception.class)
    public void addBdArticle(BdArticle bdArticle) {
        elasticsearchOperations.save(bdArticle);
    }
}

除了映射实体类也可以通过api加入单独的字段。

查询数据

@SpringBootTest(classes = ReciteWordsApplication.class)
@RunWith(SpringJUnit4ClassRunner.class)
public class ArticleELKTest {
    @Autowired
    private BdArticleService bdArticleService;
    @Autowired
    private ElasticsearchOperations elasticsearchOperations;
    @Autowired
    private RestHighLevelClient restHighLevelClient;


    @Test
    public void searchWordInArticles() throws IOException {
        String wordText = "is";
        // 拿到要查询的索引
        SearchRequest searchRequest = new SearchRequest("article");

        // 构建查询条件
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
		//指定返回字段
        String[] fields = {"id","title","context"};
        sourceBuilder.fetchSource(fields, Strings.EMPTY_ARRAY);
       //match查询，可选 sourceBuilder.query(QueryBuilders.matchQuery("context",wordText));
		//高亮设置
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置高亮渲染
        String preTag = "<span style='color:red;font-weight:bold'>";
        String postTag = "</span>";
        HighlightBuilder.Field highlightContext = new HighlightBuilder.Field("context")
                .numOfFragments(1)//没一个document中返回的条数（因为一个document中就可以有好多个关键词）
                .preTags(preTag)
                .postTags(postTag);
        highlightBuilder.field(highlightContext);
		//设置返回document条数
		sourceBuilder.size(3);
        sourceBuilder.highlighter(highlightBuilder);
        searchRequest.source(sourceBuilder);
        // 进行查询
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        //对返回值进行处理
        org.elasticsearch.search.SearchHit[] hits = searchResponse.getHits().getHits();
        if(hits==null||hits.length==0){
            return null;
        }
        List<BdArticle> list = new ArrayList<>();
        for (SearchHit hit : hits) {
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            BdArticle bdArticle = new BdArticle();
            bdArticle.setId(Long.parseLong(String.valueOf(sourceAsMap.get("id"))));
            bdArticle.setTitle(String.valueOf(sourceAsMap.get("title")));
            bdArticle.setContext(highlightFields.get("context").getFragments()[0].string());
            list.add(bdArticle);
        }
        System.out.println(list);
    }
}

对应的DSL查询语句以及查询结果：

{
    "query":{
        "match":{
            "context":"is"
        }
    },
    "_source":["id"],
    "highlight":{
        "fields":{
            "context":{
                "number_of_fragments":1
            }
        }
    }
}

res：重点看hits

{
    "took": 20,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 6,
            "relation": "eq"
        },
        "max_score": 0.14691809,
        "hits": [
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "8",
                "_score": 0.14691809,
                "_source": {
                    "id": 8
                },
                "highlight": {
                    "context": [
                        "Classified advertising <em>is</em> that advertising which <em>is</em> grouped in certain sections of the paper and <em>is</em> thus"
                    ]
                }
            },
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "3",
                "_score": 0.14463511,
                "_source": {
                    "id": 3
                },
                "highlight": {
                    "context": [
                        "There <em>is</em> considerable sentiment about the “corruption” of women’s language—which of course <em>is</em> viewed"
                    ]
                }
            },
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "2",
                "_score": 0.1358716,
                "_source": {
                    "id": 2
                },
                "highlight": {
                    "context": [
                        "Obviously it <em>is</em> not of ours.”"
                    ]
                }
            },
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "1",
                "_score": 0.12783799,
                "_source": {
                    "id": 1
                },
                "highlight": {
                    "context": [
                        "Today it <em>is</em> a giant advertising company, worth $100 billion."
                    ]
                }
            },
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "4",
                "_score": 0.11044833,
                "_source": {
                    "id": 4
                },
                "highlight": {
                    "context": [
                        "\\n The challenge <em>is</em> particularly evident in the work-place."
                    ]
                }
            },
            {
                "_index": "article",
                "_type": "_doc",
                "_id": "5",
                "_score": 0.10031616,
                "_source": {
                    "id": 5
                },
                "highlight": {
                    "context": [
                        "Like most people, I’ve long understood that I will be judged by my occupation, that my profession <em>is</em>"
                    ]
                }
            }
        ]
    }
}