Elasticsearch之新闻案例实战
Posted 爱上口袋的天空
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Elasticsearch之新闻案例实战相关的知识,希望对你有一定的参考价值。
1、创建数据库表且预置数据
1.1、表结构如下
DROP TABLE IF EXISTS `news`; CREATE TABLE `news` ( `id` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(255) NOT NULL comment '主题', `url` varchar(255) DEFAULT NULL comment '连接', `content` text comment '内容', `tags` varchar(1000) DEFAULT NULL comment '搜索的关键字', PRIMARY KEY (`id`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=92 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
1.2、数据如下
2、定义分词器以及属性类型
PUT news { "settings": { "analysis": { "analyzer": { "news_tags_analyzer": { "char_filter": ["html_strip"], "tokenizer": "keyword", "filter": "news_tags_filter" } }, "filter": { "news_tags_filter": { "type": "pinyin", "keep_full_pinyin": true, "keep_joined_full_pinyin": true, "keep_original": true } } } }, "mappings": { "properties": { "id": { "type": "long" }, "title": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart" }, "content": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart" }, "url": { "type": "keyword" }, "tags": { "type": "completion", "analyzer": "news_tags_analyzer", "search_analyzer": "keyword" } } } }
3、将mysql数据导入es
2.1、创建logstash-mysql-news.conf文件
#input表示将数据读取到logstash中 input { jdbc { jdbc_driver_library => "/opt/es781/mysql/mysql-connector-java-5.1.49.jar" jdbc_driver_class => "com.mysql.jdbc.Driver" jdbc_connection_string => "jdbc:mysql://192.168.1.13:3306/oss?userUnicode=true&characterEncoding=UTF-8&serverTimeZone=UTC" jdbc_user => "root" jdbc_password => "897570" #表示是否分页 jdbc_paging_enabled => "true" #表示每页的数量 jdbc_page_size => "20" #查询语句 statement => "SELECT * FROM news where tags is not null" } } #这个filter表示对数据进行过滤 filter { mutate { #根据逗号切割关键字 split => { "tags" => ","} } #将下面两个字段过滤掉,logstash会自动帮我们加这两个属性,我们不需要,直接去掉 mutate { remove_field => ["@timestamp","@version"] } } #将logstash中的数据输出到es中 output { elasticsearch { document_id => "%{id}" document_type => "_doc" index => "news" hosts => ["http://192.168.56.20:9200"] } stdout{ codec => rubydebug } }
2.2、将上面需要的mysql jar包上传到Linux服务器上
2.3、将logstash-mysql-news.conf文件上传到/opt/es781/logstash-7.8.1/目录下
2.4、在/opt/es781/logstash-7.8.1/目录下执行下面的命令将数据从mysql中导入es,
命令:bin/logstash -f /opt/es781/logstash-7.8.1/logstash-mysql-news.conf
2.5、在kibana上查询news索引是否成功导入数据
4、根据需求编写kibana脚本
4.1、自动补全语句
GET news/_search { "_source": false, "suggest": { "news_tags_suggest": { "prefix": "zh", "completion":{ "field":"tags", "size": 10, "skip_duplicates": true } } } }
4.2、内容搜索
GET news/_search { "_source": false, "query": { "multi_match": { "query": "中国", "fields": ["title","content"] } }, "highlight": { "pre_tags": "<span class='highLight'>", "post_tags": "</span>", "fields": { "title": {}, "content": {} } } }
5、在java中使用代码实现上面两个搜索
5.1、创建news实体类
package com.kgf.es.model; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @Data @AllArgsConstructor @NoArgsConstructor public class News { private Integer id; private String title; private String content; private String url; }
5.2、创建NewsController
package com.kgf.es.controller; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.kgf.es.model.News; import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import javax.annotation.Resource; import java.io.IOException; import java.util.ArrayList; import java.util.List; @RestController @RequestMapping("/news") public class NewsController { @Resource private RestHighLevelClient restHighLevelClient; @GetMapping("/tips") public Object autoComplete(String term) throws IOException { Request request = new Request("GET", "news/_search"); request.setJsonEntity(String.format("{" + " \\"_source\\": false, " + " \\"suggest\\": {" + " \\"news_tags_suggest\\": {" + " \\"prefix\\": \\"%s\\"," + " \\"completion\\": {" + " \\"field\\": \\"tags\\"," + " \\"size\\": 10," + " \\"skip_duplicates\\": true" + " }" + " }" + " }" + "}", term)); Response response = restHighLevelClient.getLowLevelClient().performRequest(request); String jsonString = EntityUtils.toString(response.getEntity()); // "{\\"age\\": 10}" {"age": 10} JSONObject jsonObject = JSONObject.parseObject(jsonString); JSONArray suggests = jsonObject.getJSONObject("suggest").getJSONArray("news_tags_suggest"); JSONArray options = suggests.getJSONObject(0).getJSONArray("options"); List<String> results = new ArrayList<>(); for(int i = 0; i < options.size(); i++) { JSONObject opt = options.getJSONObject(i); results.add(opt.getString("text")); } return results; } @GetMapping("/search") public List<News> query(String text) throws Exception{ /** * 1.对于高亮的数据,ES是抽取的一个个片段,然后将这些片段设置到一个数组中。 * 2.对于有些数据,可能title或者content中没有高亮的字眼,那么我们就需要取原始数据的 title 和 content. */ Request request = new Request("GET", "news/_search"); request.setJsonEntity(String.format("{" + " \\"_source\\": [\\"url\\", \\"title\\", \\"content\\"], " + " \\"query\\": {" + " \\"multi_match\\": {" + " \\"query\\": \\"%s\\"," + " \\"fields\\": [\\"title\\", \\"content\\"]" + " }" + " }," + " \\"highlight\\": {" + " \\"pre_tags\\": \\"<span class='highLight'>\\", " + " \\"post_tags\\": \\"</span>\\", " + " \\"fields\\": {" + " \\"title\\": {}," + " \\"content\\": {}" + " }" + " }" + "}", text)); Response response = restHighLevelClient.getLowLevelClient().performRequest(request); JSONObject jsonObject = JSONObject.parseObject(EntityUtils.toString(response.getEntity())); JSONArray hits = jsonObject.getJSONObject("hits").getJSONArray("hits"); List<News> results = new ArrayList<>(); for (int i = 0; i < hits.size(); i++) { News news = new News(); JSONObject hit = hits.getJSONObject(i); JSONObject highLight = hit.getJSONObject("highlight"); //获取高亮的数据结果 JSONObject _source = hit.getJSONObject("_source"); //这个是原始的数据 news.setUrl(_source.getString("url")); //设置url JSONArray highLightTitle = highLight.getJSONArray("title"); //获取高亮的 title 数组 JSONArray highLightContent = highLight.getJSONArray("content"); if(null != highLightTitle) { StringBuffer highLightTitleStringBuffer = new StringBuffer(); for (int j = 0; j < highLightTitle.size(); j++) { String titleSegment = highLightTitle.getString(j); highLightTitleStringBuffer.append(titleSegment); } news.setTitle(highLightTitleStringBuffer.toString()); }else { // 如果不存在高亮的数据,那么就取原始数据 news.setTitle(_source.getString("title")); } if(null != highLightContent) { StringBuffer highLightContentStringBuffer = new StringBuffer(); for (int j = 0; j < highLightContent.size(); j++) { String contentSegment = highLightContent.getString(j); highLightContentStringBuffer.append(contentSegment); } news.setContent(highLightContentStringBuffer.toString()); }else { // 如果不存在高亮的数据,那么就取原始数据 news.setContent(_source.getString("content")); } results.add(news); } return results; } }
6、测试
6.1、测试tips,完成对tags的关键字检索
6.2、测试内容检索
以上是关于Elasticsearch之新闻案例实战的主要内容,如果未能解决你的问题,请参考以下文章