jsoup实现网络爬虫并且以地图形式展现
Posted yizhixiaozhu
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了jsoup实现网络爬虫并且以地图形式展现相关的知识,希望对你有一定的参考价值。
本文件用到了jsoup和echarts,接下来展示目录
接下来展示各个文件内容:
Add文件内容:
package test; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; public class Add { public boolean add(String table,AddService en) { boolean c=false; String sql="insert into "+table+"(sheng,xinzeng,leiji,zhiyu,siwang,date) values(\'"+en.getSheng()+"\',\'"+en.getXinzeng()+"\',\'"+en.getLeiji()+"\',\'"+en.getZhiyu()+"\',\'"+en.getSiwang()+"\',\'"+en.getTime()+"\');"; System.out.println(sql); //sql="insert into myinfo(sheng,xinzeng,leiji,zhiyu,siwang,date) values(\'湖北\',\'13\',\'67773\',\'49056\',\'3046\',\'2020-03-11 10:12:28\');"; System.out.println(sql); Connection conn=DBUtil.getConn(); Statement state=null; try { state=conn.createStatement(); int num=state.executeUpdate(sql); if(num!=0) c=true; state.close(); conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block //e.printStackTrace(); } return c; } }
AddService文件内容:
package test; import java.sql.Time; import java.text.SimpleDateFormat; import java.util.Date; public class AddService { String sheng; String xinzeng; String leiji; String zhiyu; String siwang; String time; public String getSheng() { return sheng; } public void setSheng(String sheng) { this.sheng = sheng; } public String getXinzeng() { return xinzeng; } public void setXinzeng(String xinzeng) { this.xinzeng = xinzeng; } public String getLeiji() { return leiji; } public void setLeiji(String leiji) { this.leiji = leiji; } public String getZhiyu() { return zhiyu; } public void setZhiyu(String zhiyu) { this.zhiyu = zhiyu; } public String getSiwang() { return siwang; } public void setSiwang(String siwang) { this.siwang = siwang; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public static void main(String[] args) { Add a=new Add(); AddService as=new AddService(); as.setSheng("湖北"); as.setXinzeng("13"); as.setLeiji("67773"); as.setZhiyu("49056"); as.setSiwang("3046"); Date currentTime=new Date(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String dateString = formatter.format(currentTime); as.setTime(dateString); a.add("myinfo", as); } }
DBUtil文件内容:
package test; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; /** * 数据库连接工具 * @author Hu * */ public class DBUtil { public static String db_url = "jdbc:mysql://localhost:3306/yonghucaozuo?useSSL=false&serverTimezone=UTC&characterEncoding=UTF-8"; public static String db_user = "root"; public static String db_pass = "20183629"; public static Connection getConn () { Connection conn = null; try { Class.forName("com.mysql.jdbc.Driver");//加载驱动 conn = DriverManager.getConnection(db_url, db_user, db_pass); } catch (Exception e) { e.printStackTrace(); } return conn; } /** * 关闭连接 * @param state * @param conn */ public static void close (Statement state, Connection conn) { if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } public static void close (ResultSet rs, Statement state, Connection conn) { if (rs != null) { try { rs.close(); } catch (SQLException e) { e.printStackTrace(); } } if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } public static void main(String[] args) throws SQLException { Connection conn = getConn(); PreparedStatement pstmt = null; ResultSet rs = null; String sql ="select * from course"; pstmt = conn.prepareStatement(sql); rs = pstmt.executeQuery(); if(rs.next()){ System.out.println("空"); }else{ System.out.println("不空"); } } }
Get文件内容:
package test; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class Get { public static List find(String table,String id) { List list =new ArrayList(); boolean c; Connection conn=DBUtil.getConn(); Statement state=null; try { state=conn.createStatement(); String sql="select * from "+table+" where date =\'"+id+"\'"; ResultSet rs=state.executeQuery(sql); while(rs.next()) { Map<String,Object> map=new HashMap<String,Object>(); map.put("name", rs.getString(2)); map.put("value", rs.getString(4)); list.add(map); } rs.close(); state.close(); conn.close(); System.out.println("over"); System.out.println(1); } catch(Exception e) { } return list; } }
JsoupTestTitile文件内容:
package test; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; import bean.ShengBean; public class JsoupTestTitle { public static void main(String[] args) { JsoupTestTitle.getWuMaoW(""); } // 获取http://www.ltaaa.com/ public static List<AddService> getWuMaoW(String date) { String sheng=""; String xinzeng=""; String leiji=""; String zhiyu=""; String siwang=""; List<AddService> list=new ArrayList<AddService>(); String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/"; int i=0; try { //构造一个webClient 模拟Chrome 浏览器 WebClient webClient = new WebClient(BrowserVersion.CHROME); //支持javascript webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setActiveXNative(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(8000); HtmlPage rootPage = webClient.getPage(url); //设置一个运行JavaScript的时间 webClient.waitForBackgroundJavaScript(6000); String html = rootPage.asXml(); Document doc = Jsoup.parse(html); //System.out.println(doc); System.out.println("进去"); //得到各省份信息 Element listDiv = doc.getElementById("nationTable"); //for (Element element : listDiv) { //只要省份信息 Elements listdiv1 = listDiv.getElementsByTag("tbody"); for(Element tbody:listdiv1) { Elements listtr=tbody.getElementsByTag("tr"); for(Element tr:listtr) { Elements td1=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_MdE8uT"); for (Element real : td1) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); sheng=ptext; System.out.println(sheng); } Elements td2=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV VirusTable_1-1-203_2bK5NN"); for (Element real : td2) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); xinzeng=ptext; } Elements td3=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV"); for (Element real : td3) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); leiji=ptext; } i=0; Elements td4=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_EjGi8c"); for (Element real : td4) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); if(i==0) { zhiyu=ptext; } else { siwang=ptext; } i++; } System.out.println(); AddService as=new AddService(); as.setSheng(sheng); as.setXinzeng(xinzeng); as.setLeiji(leiji); as.setZhiyu(zhiyu); as.setSiwang(siwang); as.setTime(date); list.add(as); } } //} } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("我不好"); return list; } }
servlet文件内容:
package test; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import net.sf.json.JSONArray; /** * Servlet implementation class servlet */ @WebServlet("/pachongservlet") public class servlet extends HttpServlet { private static final long serialVersionUID = 1L; /** * @see HttpServlet#HttpServlet() */ public servlet() { super(); // TODO Auto-generated constructor stub } protected void service(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException { arg1.setContentType("text/html;charset=utf-8"); arg0.setCharacterEncoding("utf-8"); arg1.setCharacterEncoding("utf-8"); Date currentTime=new Date(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String date = formatter.format(currentTime); System.out.println("11111"); List<AddService>all=JsoupTestTitle.getWuMaoW(date); Add a=new Add(); System.out.println("22222"); for(AddService as:all) { a.add("myinfo", as); } System.out.println("33333"); List list=Get.find("myinfo",date); arg0.setAttribute("mapDataJson", JSONArray.fromObject(list)); /* * List<TwoBean> all=Get.find1("info", date); Gson gson = new Gson(); String * json = gson.toJson(all); arg0.setAttribute("message", json); */ System.out.println("44444"); arg0.getRequestDispatcher("NewFile.jsp").forward(arg0, arg1); return; /*map = new HashMap<String, Object>(); map.put("name","河北"); map.put("value",2400); list.add(map);*/ } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub response.getWriter().append("Served at: ").append(request.getContextPath()); } /** * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
NewFile文件内容:
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> </head> <body> <script src="${pageContext.request.contextPath}/js/echarts.min.js"></script> <script src="${pageContext.request.contextPath}/js/jquery-1.8.3.js"></script> <script src="http://echarts.baidu.com/build/dist/echarts.js"></script> <div id="mainMap" style="height:300px;width:600px;"></div> <form action="pachongservlet"method="post"style="margin-top:100px"> <p><input type="submit" value="刷新"style="width:120px;height:35px;margin-left:400px;background-color:#bdd7f2"></p> </form> <script type="text/javascript"> var mapDataJson=\'${mapDataJson}\'; var json=JSON.parse(mapDataJson); var quezhen; var yisi; var zhiyu; var siwang; // 路径配置 require.config({ paths: { echarts: \'http://echarts.baidu.com/build/dist\' } }); // 使用 require( [ \'echarts\', \'echarts/chart/map\' // 使用柱状图就加载bar模块,按需加载 ], function (ec) { // 基于准备好的dom,初始化echarts图表 var myChart_map = ec.init(document.getElementById(\'mainMap\')); var itemStyle = { normal:{label:{ show:true, formatter:\'{b}\', textStyle: {fontSize: 10,fontWeight : \'bold\'} }}, emphasis:{label:{show:true}} }; var option_map = { title : { text: \'\', subtext: \'累计确诊人数\', x:\'center\' }, tooltip : { trigger: \'item\' }, legend: { orient: \'vertical\', x:\'left\', data:[\'次数\'] }, dataRange: { min: 0, max: 2500, x: \'left\', y: \'bottom\', text:[\'高\',\'低\'], // 文本,默认为数值文本 calculable : true }, series : [ { name: \'总数\', type: \'map\', mapType: \'china\', roam: false, // itemStyle:{ // normal:{label:{show:true}}, // emphasis:{label:{show:true}} // }, itemStyle: itemStyle, data:getData() } ] }; //option_map.series[0].data=json; // 为echarts对象加载数据 myChart_map.setOption(option_map); } ); function getData(){ return json; } </script> </body> </html>
接下来是所爬取的网站图片:
在我完成这个小demo的时候我第一次是,每爬取到一组数据就将其导入数据库,然后就会报很多错误,然后我就改了一下方式先将爬取到的数据存到List中去然后在统一导入到数据库,然后在从servlet读取数据库中的内容,进行地图形式的信息展示
以上是关于jsoup实现网络爬虫并且以地图形式展现的主要内容,如果未能解决你的问题,请参考以下文章