ElasticSearch学习
ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!
1. 安装
这里我使用阿里云服务器,并且采用Docker 安装ES
安装elasticsearch
# 1.拉取镜像
docker pull elasticsearch:7.7.1
# 2.生成容器
docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1
安装kibana
# 1.下载kibana镜像到本地
docker pull kibana:7.7.1
# 2.启动kibana容器
docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1
安装elasticsearch-head
# 1.下载镜像
docker pull mobz/elasticsearch-head:5
# 2.生成容器
docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5
# 3.在这里可能会出现跨域拒绝访问问题
进入elasticsearch容器内部,修改配置文件elasticsearch.yml
docker ps -a #拿到运行容器elasticsearch 的 id
docker exec -it ******(容器id) /bin/bash
cd ./config
vi elasticsearch.yml
在elasticsearch.yml中添加:
http.cors.enabled: true
http.cors.allow-origin: "*"
然后重启容器
docker restart es
? 安装IK分词器
# 1.下载对应版本的IK分词器
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip
# 2.解压到plugins/elasticsearch文件夹中
yum install -y unzip #下载unzip
unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip
# 3.添加自定义扩展词和停用词
cd plugins/elasticsearch/config
vim IKAnalyzer.cfg.xml
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext_dict.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">ext_stopwords.dic</entry>
</properties>
# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效
vim ext_dict.dic 加入扩展词即可
# 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件
vim ext_stopwords.dic 加入停用词即可
# 6.将此容器提交成为一个新的镜像
docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2
# 7.使用新生成的这个es镜像创建容器,并挂载数据卷
docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2
2.项目实战(基于es的仿京东搜索)
package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
public static List<Content> parseJd(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
Document document = Jsoup.parse(new URL(url), 30000);
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList<Content> contentArrayList = new ArrayList<>();
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
-
前后端分离实现 -
后端实现 整体结构 pom.xml <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.0.RELEASE</version>
<relativePath/>
</parent>
<groupId>com.ittao</groupId>
<artifactId>elasticsearch_study</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>elasticsearch_study</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.7.1</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.61</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
config package com.ittao.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticsearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("47.101.52.63", 9200, "http")));
return client;
}
}
entity package com.ittao.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
private String title;
private String img;
private String price;
}
package com.ittao.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import org.springframework.stereotype.Component;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Accessors(chain = true)
@Component
public class User {
private String name;
private int age;
}
service package com.ittao.service.impl;
import com.alibaba.fastjson.JSON;
import com.ittao.entity.Content;
import com.ittao.service.ContentService;
import com.ittao.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.FuzzyQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentServiceImpl implements ContentService {
@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient client;
@Override
public boolean addToEs(String keyword) throws IOException {
List<Content> contentList = HtmlParseUtil.parseJd(keyword);
BulkRequest request = new BulkRequest();
for (Content content : contentList) {
request.add(new IndexRequest("jd_goods").
source(JSON.toJSONString(content), XContentType.JSON));
}
request.timeout(new TimeValue(2, TimeUnit.MINUTES));
BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);
return !response.hasFailures();
}
@Override
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo<=0){
pageNo=1;
}
SearchRequest request = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(true);
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
MatchQueryBuilder termQuery = QueryBuilders.matchQuery("title", keyword);
sourceBuilder.query(termQuery);
sourceBuilder.timeout(new TimeValue(1, TimeUnit.MINUTES));
request.source(sourceBuilder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
List<Map<String, Object>> mapList = new ArrayList<>();
for (SearchHit documentFields : response.getHits().getHits()) {
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
Map<String, HighlightField> highlightFields = documentFields.getHighlightFields();
HighlightField title = highlightFields.get("title");
if (title!=null){
Text[] fragments = title.getFragments();
String n_title="";
for (Text fragment : fragments) {
n_title +=fragment;
}
sourceAsMap.put("title", n_title);
}
mapList.add(sourceAsMap);
}
return mapList;
}
}
controller package com.ittao.Controller;
import com.ittao.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@RestController
@CrossOrigin
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/addToEs/{keyword}")
public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException {
return contentService.addToEs(keyword);
}
@GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}")
public List<Map<String, Object>> searchPage(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return contentService.searchPage(keyword, pageNo, pageSize);
}
}
utils package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
public static List<Content> parseJd(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
Document document = Jsoup.parse(new URL(url), 30000);
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList<Content> contentArrayList = new ArrayList<>();
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
前端实现
首页
<template>
<div>
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
<div class="link">
<h1>ElasticSearch的简单实战</h1>
<p>第一个功能:从京东商城中爬取我们搜索的数据,存放到eslasticsearch中</p>
<p>第二个功能:从eslasticsearch中根据关键字查询我们的数据,进行展示</p>
<router-link to="/search" ><p class="text">点我去搜索数据</p></router-link>
<router-link to="/generateData"><p class="text">点我去爬取数据</p></router-link>
</div>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "Home",
data() {
return {
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {}
};
</script>
<style scoped>
.text{
font-size: 20px;
}
.link{
text-align: left;
margin-left: 450px;
}
.logo{
height: 200px;
}
</style>
查询页面
!<template>
<div>
<el-container>
<el-header>
<el-row>
<el-row class="head">
<!-- logo图片部分 -->
<el-col :span="8">
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
</el-col>
<!-- 搜索框 -->
<el-col :span="4">
<div class="input">
<el-input placeholder="请输入搜索内容" v-model="input" clearable></el-input>
</div>
</el-col>
<el-col :span="1">
<div class="input">
<el-button type="danger" @click="searchData">搜索</el-button>
</div>
</el-col>
<el-col :span="4" :offset="6">
<div class="input">
<el-link href="/generateData">点我去爬取数据</el-link>
<el-link href="/">点我去首页</el-link>
</div>
</el-col>
</el-row>
</el-row>
</el-header>
<!-- 图片展示部分 -->
<div class="content">
<div class="row">
<ul>
<li v-for="(item,index) in dataList" :key="index">
<div class="col">
<div class="image">
<img height="220px" :src="item.img" />
</div>
<div class="p-price">
<strong>
<i>{{item.price}}</i>
</strong>
</div>
<div class="p-title">
<p class="p-title1" v-html="item.title"></p>
</div>
<div class="p-commit">
<strong>
<a target="_blank" href="https://www.jd.com/">1300+</a>条评价
</strong>
</div>
<div class="p-shop">
<a target="_blank" class="curr-shop hd-shopname" href="https://www.jd.com/" title="文轩网旗舰店">文轩网旗舰店</a>
</div>
</div>
</li>
</ul>
</div>
</div>
</el-container>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "Search",
data() {
return {
logosrc: Logosrc,
input: "",
dataList: []
};
},
components: {},
created() {},
methods: {
searchData() {
//获得搜索的关键字
console.log(this.input)
//发送axios请求
this.$http.get(`/searchPage/${this.input}/${1}/${20}`).then(res =>{
console.log(res.data)
this.dataList = res.data
if(this.dataList.length < 1){
alert("暂无数据请重新搜索或者去生成数据!")
}
})
}
}
};
</script>
<style scoped>
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
.content {
/* border: 1px solid sandybrown; */
width: 100%;
height: 1200px;
margin: 50px auto;
}
.row {
/* border: 1px solid saddlebrown; */
width: 100%;
height: 400px;
float: left;
}
.col {
/* border: 1px solid tan; */
width: 25%;
height: 400px;
float: left;
}
.image {
text-align: left;
margin-left: 20px;
}
.p-price {
text-align: left;
margin-left: 20px;
color: red;
}
.p-title1{
font-size: 10px;
}
.p-commit {
text-align: left;
}
.p-shop {
text-align: left;
}
ul li {
list-style-type: none;
}
</style>
生成数据页面
<template>
<div>
<el-container>
<el-header>
<el-row>
<el-row class="head">
<!-- logo图片部分 -->
<el-col :span="8">
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
</el-col>
<!-- 搜索框 -->
<el-col :span="4">
<div class="input">
<el-input placeholder="请输入需要生成数据内容" v-model="input" clearable></el-input>
</div>
</el-col>
<el-col :span="1">
<div class="input">
<el-button type="danger" @click="generateData">生成</el-button>
</div>
</el-col>
<el-col :span="4" :offset="6">
<div class="input">
<el-link href="/search">点我去查询</el-link>
<el-link href="/">点我去首页</el-link>
</div>
</el-col>
</el-row>
</el-row>
</el-header>
</el-container>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "GenerateData",
data() {
return {
input: "",
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {
generateData() {
this.$http.get(`/addToEs/${this.input}`).then(res => {
console.log(res.data);
if (res.data == true) {
this.$message({
type: "success",
message: "生成数据成功,你可以去查询啦!"
});
}
});
}
}
};
</script>
<style scoped>
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
</style>
route中index.js
import Vue from 'vue'
import VueRouter from 'vue-router'
import Search from '../views/Search.vue'
import Home from '../views/Home.vue'
import GenerateData from '../views/GenerateData.vue'
Vue.use(VueRouter)
const routes = [
{
path: '/',
name: 'Home',
component: Home
},
{
path: '/search',
name: 'Search',
component: Search
},
{
path: '/generateData',
name: 'GenerateData',
component: GenerateData
}
]
const router = new VueRouter({
mode: 'history',
base: process.env.BASE_URL,
routes
})
export default router
main.js
import Vue from 'vue'
import App from './App.vue'
import router from './router'
import store from './store'
import ElementUI from 'element-ui'
import 'element-ui/lib/theme-chalk/index.css'
import axios from 'axios'
Vue.config.productionTip = false
Vue.use(ElementUI);
Vue.prototype.$http = axios
Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989'
new Vue({
router,
store,
render: h => h(App)
}).$mount('#app')
3.总结
通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下
1.es的安装,尤其通过docker安装
2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作
3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,
4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力
|