使用Spring Data ElasticSearch+Jsoup操作集群数据存储

Posted ixfcao

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用Spring Data ElasticSearch+Jsoup操作集群数据存储相关的知识,希望对你有一定的参考价值。

使用Spring Data ElasticSearch+Jsoup操作集群数据存储

1、使用Jsoup爬取京东商城的商品数据

1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:

2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:

3)将爬取到的商品对象存储到集群中,代码截图:

4)完成对商品信息的查询、分页、删除和更新操作,代码截图:

 

applicationContext.xml

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <beans xmlns="http://www.springframework.org/schema/beans"
 3        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
 4        xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch"
 5        xsi:schemaLocation="http://www.springframework.org/schema/beans
 6        http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd  http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd">
 7     <!--开启包扫描-->
 8     <context:component-scan base-package="com.elasticsearch"/>
 9     <!--配置集群信息-->
10     <elasticsearch:transport-client id="esClient" cluster-name="my-cluster" cluster-nodes="127.0.0.1:9301,
11                     127.0.0.1:9302,127.0.0.1:9303"/>
12     <!--注入ESTemplate模板-->
13     <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate">
14         <constructor-arg name="client" ref="esClient"/>
15     </bean>
16     <!--扫描Mapper(mybatis中直接操作数据),在对应的包下BeanMapper,Bean在ES中是类型(表)-->
17     <elasticsearch:repositories base-package="com.elasticsearch.mapper"/>
18 </beans>

pom.xml

  1 <?xml version="1.0" encoding="UTF-8"?>
  2 
  3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5   <modelVersion>4.0.0</modelVersion>
  6 
  7   <groupId>com.elasticsearch</groupId>
  8   <artifactId>eshm0430</artifactId>
  9   <version>1.0-SNAPSHOT</version>
 10 
 11   <name>eshm0430</name>
 12   <!-- FIXME change it to the project\'s website -->
 13   <url>http://www.example.com</url>
 14 
 15   <properties>
 16     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 17     <maven.compiler.source>1.7</maven.compiler.source>
 18     <maven.compiler.target>1.7</maven.compiler.target>
 19   </properties>
 20 
 21   <dependencies>
 22     <dependency>
 23       <groupId>junit</groupId>
 24       <artifactId>junit</artifactId>
 25       <version>4.11</version>
 26       <scope>test</scope>
 27     </dependency>
 28 
 29 
 30     <dependency>
 31       <groupId>org.springframework.data</groupId>
 32       <artifactId>spring-data-elasticsearch</artifactId>
 33       <version>3.1.9.RELEASE</version>
 34       <exclusions>
 35         <exclusion>
 36           <groupId>org.elasticsearch.plugin</groupId>
 37           <artifactId>transport‐netty4‐client</artifactId>
 38         </exclusion>
 39       </exclusions>
 40     </dependency>
 41 
 42     <dependency>
 43       <groupId>org.springframework</groupId>
 44       <artifactId>spring-test</artifactId>
 45       <version>5.1.5.RELEASE</version>
 46       <scope>test</scope>
 47     </dependency>
 48     <dependency>
 49       <groupId>junit</groupId>
 50       <artifactId>junit</artifactId>
 51       <version>4.12</version>
 52       <scope>compile</scope>
 53     </dependency>
 54     <dependency>
 55       <groupId>org.springframework</groupId>
 56       <artifactId>spring-test</artifactId>
 57       <version>5.2.5.RELEASE</version>
 58       <scope>compile</scope>
 59     </dependency>
 60     <!--// jsoup-->
 61     <dependency>
 62       <groupId>org.jsoup</groupId>
 63       <artifactId>jsoup</artifactId>
 64       <version>1.11.3</version>
 65     </dependency>
 66   </dependencies>
 67 
 68   <build>
 69     <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
 70       <plugins>
 71         <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 72         <plugin>
 73           <artifactId>maven-clean-plugin</artifactId>
 74           <version>3.1.0</version>
 75         </plugin>
 76         <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 77         <plugin>
 78           <artifactId>maven-resources-plugin</artifactId>
 79           <version>3.0.2</version>
 80         </plugin>
 81         <plugin>
 82           <artifactId>maven-compiler-plugin</artifactId>
 83           <version>3.8.0</version>
 84         </plugin>
 85         <plugin>
 86           <artifactId>maven-surefire-plugin</artifactId>
 87           <version>2.22.1</version>
 88         </plugin>
 89         <plugin>
 90           <artifactId>maven-jar-plugin</artifactId>
 91           <version>3.0.2</version>
 92         </plugin>
 93         <plugin>
 94           <artifactId>maven-install-plugin</artifactId>
 95           <version>2.5.2</version>
 96         </plugin>
 97         <plugin>
 98           <artifactId>maven-deploy-plugin</artifactId>
 99           <version>2.8.2</version>
100         </plugin>
101         <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
102         <plugin>
103           <artifactId>maven-site-plugin</artifactId>
104           <version>3.7.1</version>
105         </plugin>
106         <plugin>
107           <artifactId>maven-project-info-reports-plugin</artifactId>
108           <version>3.0.0</version>
109         </plugin>
110       </plugins>
111     </pluginManagement>
112     <plugins>
113       <plugin>
114         <groupId>org.apache.maven.plugins</groupId>
115         <artifactId>maven-compiler-plugin</artifactId>
116         <configuration>
117           <source>8</source>
118           <target>8</target>
119         </configuration>
120       </plugin>
121     </plugins>
122   </build>
123 </project>

Product

 1 package com.elasticsearch.entity;
 2 import org.springframework.data.annotation.Id;
 3 import org.springframework.data.elasticsearch.annotations.Document;
 4 import org.springframework.data.elasticsearch.annotations.Field;
 5 import org.springframework.data.elasticsearch.annotations.FieldType;
 6 
 7 @Document(indexName = "my-index3", type = "Product")
 8 public class Product {
 9     @Id
10 
11     @Field(type = FieldType.Long,index = false,store = true)
12     private Long id;
13 
14     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
15     private String pname;
16 
17     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
18     private String pprice;
19 
20     @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word")
21     private String padress;
22 
23     public Long getId() {
24         return id;
25     }
26 
27     public void setId(Long id) {
28         this.id = id;
29     }
30 
31     public String getPname() {
32         return pname;
33     }
34 
35     public void setPname(String pname) {
36         this.pname = pname;
37     }
38 
39     public String getPprice() {
40         return pprice;
41     }
42 
43     public void setPprice(String pprice) {
44         this.pprice = pprice;
45     }
46 
47     public String getPadress() {
48         return padress;
49     }
50 
51     public void setPadress(String padress) {
52         this.padress = padress;
53     }
54 
55     @Override
56     public String toString() {
57         return "Product{" +
58                 "id=" + id +
59                 ", pname=\'" + pname + \'\\\'\' +
60                 ", pprice=\'" + pprice + \'\\\'\' +
61                 ", padress=\'" + padress + \'\\\'\' +
62                 \'}\';
63     }
64 }

 

ProductMapper

 1 package com.elasticsearch.mapper;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import org.springframework.data.domain.Pageable;
 5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository;
 6 import org.springframework.stereotype.Repository;
 7 
 8 import java.util.List;
 9 
10 @Repository
11 public interface ProductMapper extends ElasticsearchCrudRepository <Product,Long> {
12 
13 
14     // 根据标题查询并分页
15     List<Product> findByPname(String pname , Pageable pageable);
16 
17 }

ProductService

 1 package com.elasticsearch.service;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import org.springframework.data.domain.Pageable;
 5 
 6 import java.util.List;
 7 import java.util.Optional;
 8 
 9 
10 public interface ProductService {
11 
12     // 新增文档的方法
13     void save(Product product);
14 
15     //  根据文档查询商品信息
16     Optional<Product> findById(Long id);
17 
18     // 根据id删除
19     void deleteById(Long id);
20 
21     // 根据id更新文件
22     void updateById(Product product);
23 
24     // 根据标题查询并分页
25     List<Product> findByPname(String pname ,Pageable pageable);
26 
27 
28 }

ProductServiceImp

 1 package com.elasticsearch.service.Imp;
 2 
 3 import com.elasticsearch.entity.Product;
 4 import com.elasticsearch.mapper.ProductMapper;
 5 import com.elasticsearch.service.ProductService;
 6 import org.springframework.beans.factory.annotation.Autowired;
 7 import org.springframework.data.domain.Pageable;
 8 import org.springframework.stereotype.Service;
 9 
10 import java.util.List;
11 import java.util.Optional;
12 
13 @Service("ProductService")
14 public class ProductServiceImp implements ProductService {
15     @Autowired
16     private ProductMapper productMapper;
17 
18     @Override
19     public void save(Product product) {
20         productMapper.save(product);
21     }
22 
23     @Override
24     public Optional<Product> findById(Long id) {
25         return productMapper.findById(id);
26     }
27 
28     @Override
29     public void deleteById(Long id) {
30         productMapper.deleteById(id);
31     }
32 
33     @Override
34     public void updateById(Product product) {
35         productMapper.save(product);
36     }
37 
38     @Override
39     public List<Product> findByPname(String pname, Pageable pageable) {
40         return productMapper.findByPname(pname,pageable);
41     }
42 }

SpringDataESTest

  1 package com.elasticsearch;
  2 import com.elasticsearch.entity.Product;
  3 import com.elasticsearch.service.ProductService;
  4 import org.jsoup.Jsoup;
  5 import org.jsoup.nodes.Document;
  6 import org.jsoup.nodes.Element;
  7 import org.jsoup.select.Elements;
  8 import org.junit.Test;
  9 import org.junit.runner.RunWith;
 10 import org.springframework.beans.factory.annotation.Autowired;
 11 import org.springframework.data.domain.PageRequest;
 12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
 13 import org.springframework.test.context.ContextConfiguration;
 14 以上是关于使用Spring Data ElasticSearch+Jsoup操作集群数据存储的主要内容,如果未能解决你的问题,请参考以下文章

是否可以在 Spring(非 Boot)上使用 Testcontainers?

elasticsear+kibana+logstash 优化

spark 怎么去连接 ElasticSearch

无法将 Spring Data MongoDB + Spring Data JPA 与 Spring Boot 一起使用

使用 Spring data mongo 和 Spring data elasticsearch 时如何建模?

Spring Boot(17)——使用Spring Data JPA