jsoup实现网络爬虫并且以地图形式展现

Posted yizhixiaozhu

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了jsoup实现网络爬虫并且以地图形式展现相关的知识,希望对你有一定的参考价值。

本文件用到了jsoup和echarts,接下来展示目录

 

 

 

接下来展示各个文件内容:

Add文件内容:

package test;

import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;

public class Add {
    
    public boolean add(String table,AddService en)
    {
        boolean c=false;
        String sql="insert into "+table+"(sheng,xinzeng,leiji,zhiyu,siwang,date) values(\'"+en.getSheng()+"\',\'"+en.getXinzeng()+"\',\'"+en.getLeiji()+"\',\'"+en.getZhiyu()+"\',\'"+en.getSiwang()+"\',\'"+en.getTime()+"\');";           
        System.out.println(sql);
        //sql="insert into myinfo(sheng,xinzeng,leiji,zhiyu,siwang,date) values(\'湖北\',\'13\',\'67773\',\'49056\',\'3046\',\'2020-03-11 10:12:28\');";
        System.out.println(sql);
        Connection conn=DBUtil.getConn();
        Statement state=null;
        try {
            state=conn.createStatement();
            int num=state.executeUpdate(sql);
            if(num!=0)
                c=true;
            
            
            state.close();
            conn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            //e.printStackTrace();
        }
        
        
        return c;
        
    }

}

AddService文件内容:

package test;


import java.sql.Time;
import java.text.SimpleDateFormat;
import java.util.Date;



public class AddService {
    String sheng;
    String xinzeng;
    String leiji;
    String zhiyu;
    String siwang;
    String time;
    public String getSheng() {
        return sheng;
    }
    public void setSheng(String sheng) {
        this.sheng = sheng;
    }
    public String getXinzeng() {
        return xinzeng;
    }
    public void setXinzeng(String xinzeng) {
        this.xinzeng = xinzeng;
    }
    public String getLeiji() {
        return leiji;
    }
    public void setLeiji(String leiji) {
        this.leiji = leiji;
    }
    public String getZhiyu() {
        return zhiyu;
    }
    public void setZhiyu(String zhiyu) {
        this.zhiyu = zhiyu;
    }
    public String getSiwang() {
        return siwang;
    }
    public void setSiwang(String siwang) {
        this.siwang = siwang;
    }
    
    public String getTime() {
        return time;
    }
    public void setTime(String time) {
        this.time = time;
    }
    public static void main(String[] args) {
        Add a=new Add();
        AddService as=new AddService();
        as.setSheng("湖北");
        as.setXinzeng("13");
        as.setLeiji("67773");
        as.setZhiyu("49056");
        as.setSiwang("3046");
        Date currentTime=new Date();
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String dateString = formatter.format(currentTime);
        as.setTime(dateString);
        a.add("myinfo", as);
    }

}

DBUtil文件内容:

package test;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

/**
 * 数据库连接工具
 * @author Hu
 *
 */
public class DBUtil {
    
    public static String db_url = "jdbc:mysql://localhost:3306/yonghucaozuo?useSSL=false&serverTimezone=UTC&characterEncoding=UTF-8";
    public static String db_user = "root";
    public static String db_pass = "20183629";
    
    public static Connection getConn () {
        Connection conn = null;
        
        try {
            Class.forName("com.mysql.jdbc.Driver");//加载驱动
            conn = DriverManager.getConnection(db_url, db_user, db_pass);
        } catch (Exception e) {
            e.printStackTrace();
        }
        
        return conn;
    }
    
    /**
     * 关闭连接
     * @param state
     * @param conn
     */
    public static void close (Statement state, Connection conn) {
        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
    
    public static void close (ResultSet rs, Statement state, Connection conn) {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public static void main(String[] args) throws SQLException {
        Connection conn = getConn();
        PreparedStatement pstmt = null;
        ResultSet rs = null;
        String sql ="select * from course";
        pstmt = conn.prepareStatement(sql);
        rs = pstmt.executeQuery();
        if(rs.next()){
            System.out.println("空");
        }else{
            System.out.println("不空");
        }
    }
}

Get文件内容:

package test;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;



public class Get {
    
    public static List find(String table,String id)
    {
        List list =new ArrayList();
        boolean c;
        Connection conn=DBUtil.getConn();
        Statement state=null;
        try
        {
            state=conn.createStatement();
            String sql="select * from "+table+" where date =\'"+id+"\'";
            ResultSet rs=state.executeQuery(sql);
            while(rs.next())
            {
                Map<String,Object> map=new HashMap<String,Object>();
                map.put("name", rs.getString(2));
                map.put("value", rs.getString(4));
                list.add(map);
            }
            rs.close();
            state.close();
            conn.close();
            
            
            
            System.out.println("over");
            System.out.println(1);
        
        }
        catch(Exception e)
        {
            
        }
        return list;
    }
}

JsoupTestTitile文件内容:

package test;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;

import bean.ShengBean;
 
public class JsoupTestTitle {
    public static void main(String[] args) {
        JsoupTestTitle.getWuMaoW("");
    }
 
    // 获取http://www.ltaaa.com/
    public static List<AddService> getWuMaoW(String date) {
        String sheng="";
        String xinzeng="";
        String leiji="";
        String zhiyu="";
        String siwang="";
        List<AddService> list=new ArrayList<AddService>();
        String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/";
        
        int i=0;
        
        try {
            //构造一个webClient 模拟Chrome 浏览器
            WebClient webClient = new WebClient(BrowserVersion.CHROME);
            //支持javascript
            webClient.getOptions().setJavaScriptEnabled(true);
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setActiveXNative(false);
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setThrowExceptionOnScriptError(false);
            webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
            webClient.getOptions().setTimeout(8000);
            HtmlPage rootPage = webClient.getPage(url);
            //设置一个运行JavaScript的时间
            webClient.waitForBackgroundJavaScript(6000);
            String html = rootPage.asXml();
            Document doc = Jsoup.parse(html);
            //System.out.println(doc);
            System.out.println("进去");
            //得到各省份信息
            Element listDiv = doc.getElementById("nationTable");
            //for (Element element : listDiv) {
                //只要省份信息
                Elements listdiv1 = listDiv.getElementsByTag("tbody");
                for(Element tbody:listdiv1)
                {
                    
                    Elements listtr=tbody.getElementsByTag("tr");
                    for(Element tr:listtr)
                    {
                        Elements td1=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_MdE8uT");
                        for (Element real : td1) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            sheng=ptext;
                            System.out.println(sheng);
         
                        }
                        Elements td2=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV VirusTable_1-1-203_2bK5NN");
                        for (Element real : td2) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            xinzeng=ptext;
         
                        }
                        Elements td3=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV");
                        for (Element real : td3) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            leiji=ptext;
         
                        }
                        i=0;
                        Elements td4=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_EjGi8c");
                        for (Element real : td4) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            if(i==0)
                            {
                                zhiyu=ptext;
                            }
                            else
                            {
                                siwang=ptext;
                            }
                            i++;
                            
                            
         
                        }
                        
                        System.out.println();
                        AddService as=new AddService();
                        as.setSheng(sheng);
                        as.setXinzeng(xinzeng);
                        as.setLeiji(leiji);
                        as.setZhiyu(zhiyu);
                        as.setSiwang(siwang);
                        as.setTime(date);
                        list.add(as);
                        
                        
                        
                    }
                    
                }
                
                
            //}
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println("我不好");
        return list;
    }
 
}

servlet文件内容:

package test;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import net.sf.json.JSONArray;



/**
 * Servlet implementation class servlet
 */
@WebServlet("/pachongservlet")
public class servlet extends HttpServlet {
    private static final long serialVersionUID = 1L;
       
    /**
     * @see HttpServlet#HttpServlet()
     */
    public servlet() {
        super();
        // TODO Auto-generated constructor stub
    }
    protected void service(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException
    {
        arg1.setContentType("text/html;charset=utf-8");
        arg0.setCharacterEncoding("utf-8");
        arg1.setCharacterEncoding("utf-8");
        Date currentTime=new Date();
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String date = formatter.format(currentTime);
        System.out.println("11111");
        List<AddService>all=JsoupTestTitle.getWuMaoW(date);
        Add a=new Add();
        System.out.println("22222");
        for(AddService as:all)
        {
            a.add("myinfo", as);
        }
        System.out.println("33333");
        List list=Get.find("myinfo",date);
        arg0.setAttribute("mapDataJson", JSONArray.fromObject(list));
        
        
        /*
         * List<TwoBean> all=Get.find1("info", date); Gson gson = new Gson(); String
         * json = gson.toJson(all); arg0.setAttribute("message", json);
         */
        
        System.out.println("44444");
        arg0.getRequestDispatcher("NewFile.jsp").forward(arg0, arg1);
        return;
        /*map = new HashMap<String, Object>();
        map.put("name","河北");
        map.put("value",2400);
        list.add(map);*/

    }

    /**
     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        response.getWriter().append("Served at: ").append(request.getContextPath());
    }

    /**
     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
    }

}

NewFile文件内容:

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
</head>
<body>
    <script src="${pageContext.request.contextPath}/js/echarts.min.js"></script>
    <script src="${pageContext.request.contextPath}/js/jquery-1.8.3.js"></script>
<script src="http://echarts.baidu.com/build/dist/echarts.js"></script>
<div id="mainMap" style="height:300px;width:600px;"></div>


<form action="pachongservlet"method="post"style="margin-top:100px">
        <p><input type="submit" value="刷新"style="width:120px;height:35px;margin-left:400px;background-color:#bdd7f2"></p>
    </form>
    
    
    
    
    
<script type="text/javascript">
    var mapDataJson=\'${mapDataJson}\';
    var json=JSON.parse(mapDataJson);
    var quezhen;
    var yisi;
    var zhiyu;
    var siwang;
    // 路径配置
    require.config({
        paths: {
            echarts: \'http://echarts.baidu.com/build/dist\'
        }
    });
    // 使用
    require(
            [
                \'echarts\',
                \'echarts/chart/map\' // 使用柱状图就加载bar模块,按需加载
            ],
            function (ec) {
                // 基于准备好的dom,初始化echarts图表
                var myChart_map = ec.init(document.getElementById(\'mainMap\'));

                var itemStyle = {
                    normal:{label:{
                        show:true,
                        formatter:\'{b}\',
                        textStyle: {fontSize: 10,fontWeight : \'bold\'}
                    }},
                    emphasis:{label:{show:true}}
                };

                var option_map = {
                    title : {
                        text: \'\',
                        subtext: \'累计确诊人数\',
                        x:\'center\'
                    },
                    tooltip : {
                        trigger: \'item\'
                    },
                    legend: {
                        orient: \'vertical\',
                        x:\'left\',
                        data:[\'次数\']
                    },
                    dataRange: {
                        min: 0,
                        max: 2500,
                        x: \'left\',
                        y: \'bottom\',
                        text:[\'高\',\'低\'],           // 文本,默认为数值文本
                        calculable : true
                    },

                    series : [
                        {
                            name: \'总数\',
                            type: \'map\',
                            mapType: \'china\',
                            roam: false,
//                            itemStyle:{
//                                normal:{label:{show:true}},
//                                emphasis:{label:{show:true}}
//                            },
                            itemStyle: itemStyle,
                            data:getData()
                        }

                    ]
                };

                //option_map.series[0].data=json;  
                
              

                
                // 为echarts对象加载数据
                myChart_map.setOption(option_map);
                
                    
                    
                    
                    
                    
                
            }
    );
    function getData(){
        return json;
        }
</script>

</body>
</html>

接下来是所爬取的网站图片:

 

 

 

在我完成这个小demo的时候我第一次是,每爬取到一组数据就将其导入数据库,然后就会报很多错误,然后我就改了一下方式先将爬取到的数据存到List中去然后在统一导入到数据库,然后在从servlet读取数据库中的内容,进行地图形式的信息展示

 

以上是关于jsoup实现网络爬虫并且以地图形式展现的主要内容,如果未能解决你的问题,请参考以下文章

Android实战——jsoup实现网络爬虫,糗事百科项目的起步

Java实现网络爬虫

网络爬虫入门系列 (Jsoup)

java网络爬虫实现信息的抓取

网络爬虫

jsoup爬虫的底层原理