使用Spring Data ElasticSearch+Jsoup操作集群数据存储
Posted ixfcao
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用Spring Data ElasticSearch+Jsoup操作集群数据存储相关的知识,希望对你有一定的参考价值。
使用Spring Data ElasticSearch+Jsoup操作集群数据存储
1、使用Jsoup爬取京东商城的商品数据
1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:
2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:
3)将爬取到的商品对象存储到集群中,代码截图:
4)完成对商品信息的查询、分页、删除和更新操作,代码截图:
applicationContext.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 <beans xmlns="http://www.springframework.org/schema/beans" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context" 4 xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch" 5 xsi:schemaLocation="http://www.springframework.org/schema/beans 6 http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd"> 7 <!--开启包扫描--> 8 <context:component-scan base-package="com.elasticsearch"/> 9 <!--配置集群信息--> 10 <elasticsearch:transport-client id="esClient" cluster-name="my-cluster" cluster-nodes="127.0.0.1:9301, 11 127.0.0.1:9302,127.0.0.1:9303"/> 12 <!--注入ESTemplate模板--> 13 <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate"> 14 <constructor-arg name="client" ref="esClient"/> 15 </bean> 16 <!--扫描Mapper(mybatis中直接操作数据),在对应的包下BeanMapper,Bean在ES中是类型(表)--> 17 <elasticsearch:repositories base-package="com.elasticsearch.mapper"/> 18 </beans>
pom.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 <modelVersion>4.0.0</modelVersion> 6 7 <groupId>com.elasticsearch</groupId> 8 <artifactId>eshm0430</artifactId> 9 <version>1.0-SNAPSHOT</version> 10 11 <name>eshm0430</name> 12 <!-- FIXME change it to the project\'s website --> 13 <url>http://www.example.com</url> 14 15 <properties> 16 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 17 <maven.compiler.source>1.7</maven.compiler.source> 18 <maven.compiler.target>1.7</maven.compiler.target> 19 </properties> 20 21 <dependencies> 22 <dependency> 23 <groupId>junit</groupId> 24 <artifactId>junit</artifactId> 25 <version>4.11</version> 26 <scope>test</scope> 27 </dependency> 28 29 30 <dependency> 31 <groupId>org.springframework.data</groupId> 32 <artifactId>spring-data-elasticsearch</artifactId> 33 <version>3.1.9.RELEASE</version> 34 <exclusions> 35 <exclusion> 36 <groupId>org.elasticsearch.plugin</groupId> 37 <artifactId>transport‐netty4‐client</artifactId> 38 </exclusion> 39 </exclusions> 40 </dependency> 41 42 <dependency> 43 <groupId>org.springframework</groupId> 44 <artifactId>spring-test</artifactId> 45 <version>5.1.5.RELEASE</version> 46 <scope>test</scope> 47 </dependency> 48 <dependency> 49 <groupId>junit</groupId> 50 <artifactId>junit</artifactId> 51 <version>4.12</version> 52 <scope>compile</scope> 53 </dependency> 54 <dependency> 55 <groupId>org.springframework</groupId> 56 <artifactId>spring-test</artifactId> 57 <version>5.2.5.RELEASE</version> 58 <scope>compile</scope> 59 </dependency> 60 <!--// jsoup--> 61 <dependency> 62 <groupId>org.jsoup</groupId> 63 <artifactId>jsoup</artifactId> 64 <version>1.11.3</version> 65 </dependency> 66 </dependencies> 67 68 <build> 69 <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> 70 <plugins> 71 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> 72 <plugin> 73 <artifactId>maven-clean-plugin</artifactId> 74 <version>3.1.0</version> 75 </plugin> 76 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> 77 <plugin> 78 <artifactId>maven-resources-plugin</artifactId> 79 <version>3.0.2</version> 80 </plugin> 81 <plugin> 82 <artifactId>maven-compiler-plugin</artifactId> 83 <version>3.8.0</version> 84 </plugin> 85 <plugin> 86 <artifactId>maven-surefire-plugin</artifactId> 87 <version>2.22.1</version> 88 </plugin> 89 <plugin> 90 <artifactId>maven-jar-plugin</artifactId> 91 <version>3.0.2</version> 92 </plugin> 93 <plugin> 94 <artifactId>maven-install-plugin</artifactId> 95 <version>2.5.2</version> 96 </plugin> 97 <plugin> 98 <artifactId>maven-deploy-plugin</artifactId> 99 <version>2.8.2</version> 100 </plugin> 101 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> 102 <plugin> 103 <artifactId>maven-site-plugin</artifactId> 104 <version>3.7.1</version> 105 </plugin> 106 <plugin> 107 <artifactId>maven-project-info-reports-plugin</artifactId> 108 <version>3.0.0</version> 109 </plugin> 110 </plugins> 111 </pluginManagement> 112 <plugins> 113 <plugin> 114 <groupId>org.apache.maven.plugins</groupId> 115 <artifactId>maven-compiler-plugin</artifactId> 116 <configuration> 117 <source>8</source> 118 <target>8</target> 119 </configuration> 120 </plugin> 121 </plugins> 122 </build> 123 </project>
Product
1 package com.elasticsearch.entity; 2 import org.springframework.data.annotation.Id; 3 import org.springframework.data.elasticsearch.annotations.Document; 4 import org.springframework.data.elasticsearch.annotations.Field; 5 import org.springframework.data.elasticsearch.annotations.FieldType; 6 7 @Document(indexName = "my-index3", type = "Product") 8 public class Product { 9 @Id 10 11 @Field(type = FieldType.Long,index = false,store = true) 12 private Long id; 13 14 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 15 private String pname; 16 17 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 18 private String pprice; 19 20 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 21 private String padress; 22 23 public Long getId() { 24 return id; 25 } 26 27 public void setId(Long id) { 28 this.id = id; 29 } 30 31 public String getPname() { 32 return pname; 33 } 34 35 public void setPname(String pname) { 36 this.pname = pname; 37 } 38 39 public String getPprice() { 40 return pprice; 41 } 42 43 public void setPprice(String pprice) { 44 this.pprice = pprice; 45 } 46 47 public String getPadress() { 48 return padress; 49 } 50 51 public void setPadress(String padress) { 52 this.padress = padress; 53 } 54 55 @Override 56 public String toString() { 57 return "Product{" + 58 "id=" + id + 59 ", pname=\'" + pname + \'\\\'\' + 60 ", pprice=\'" + pprice + \'\\\'\' + 61 ", padress=\'" + padress + \'\\\'\' + 62 \'}\'; 63 } 64 }
ProductMapper
1 package com.elasticsearch.mapper; 2 3 import com.elasticsearch.entity.Product; 4 import org.springframework.data.domain.Pageable; 5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository; 6 import org.springframework.stereotype.Repository; 7 8 import java.util.List; 9 10 @Repository 11 public interface ProductMapper extends ElasticsearchCrudRepository <Product,Long> { 12 13 14 // 根据标题查询并分页 15 List<Product> findByPname(String pname , Pageable pageable); 16 17 }
ProductService
1 package com.elasticsearch.service; 2 3 import com.elasticsearch.entity.Product; 4 import org.springframework.data.domain.Pageable; 5 6 import java.util.List; 7 import java.util.Optional; 8 9 10 public interface ProductService { 11 12 // 新增文档的方法 13 void save(Product product); 14 15 // 根据文档查询商品信息 16 Optional<Product> findById(Long id); 17 18 // 根据id删除 19 void deleteById(Long id); 20 21 // 根据id更新文件 22 void updateById(Product product); 23 24 // 根据标题查询并分页 25 List<Product> findByPname(String pname ,Pageable pageable); 26 27 28 }
ProductServiceImp
1 package com.elasticsearch.service.Imp; 2 3 import com.elasticsearch.entity.Product; 4 import com.elasticsearch.mapper.ProductMapper; 5 import com.elasticsearch.service.ProductService; 6 import org.springframework.beans.factory.annotation.Autowired; 7 import org.springframework.data.domain.Pageable; 8 import org.springframework.stereotype.Service; 9 10 import java.util.List; 11 import java.util.Optional; 12 13 @Service("ProductService") 14 public class ProductServiceImp implements ProductService { 15 @Autowired 16 private ProductMapper productMapper; 17 18 @Override 19 public void save(Product product) { 20 productMapper.save(product); 21 } 22 23 @Override 24 public Optional<Product> findById(Long id) { 25 return productMapper.findById(id); 26 } 27 28 @Override 29 public void deleteById(Long id) { 30 productMapper.deleteById(id); 31 } 32 33 @Override 34 public void updateById(Product product) { 35 productMapper.save(product); 36 } 37 38 @Override 39 public List<Product> findByPname(String pname, Pageable pageable) { 40 return productMapper.findByPname(pname,pageable); 41 } 42 }
SpringDataESTest
1 package com.elasticsearch; 2 import com.elasticsearch.entity.Product; 3 import com.elasticsearch.service.ProductService; 4 import org.jsoup.Jsoup; 5 import org.jsoup.nodes.Document; 6 import org.jsoup.nodes.Element; 7 import org.jsoup.select.Elements; 8 import org.junit.Test; 9 import org.junit.runner.RunWith; 10 import org.springframework.beans.factory.annotation.Autowired; 11 import org.springframework.data.domain.PageRequest; 12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate; 13 import org.springframework.test.context.ContextConfiguration; 14以上是关于使用Spring Data ElasticSearch+Jsoup操作集群数据存储的主要内容,如果未能解决你的问题,请参考以下文章 是否可以在 Spring(非 Boot)上使用 Testcontainers?
elasticsear+kibana+logstash 优化
无法将 Spring Data MongoDB + Spring Data JPA 与 Spring Boot 一起使用