jsoup爬取某网站安全数据
Posted 西北逍遥
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了jsoup爬取某网站安全数据相关的知识,希望对你有一定的参考价值。
jsoup爬取某网站安全数据
package com.vfsd.net; import java.io.IOException; import java.sql.SQLException; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.vfsd.dao.Managemysql; /** * Servlet implementation class GetURL13 */ @WebServlet("/GetURL13") public class GetURL13 extends HttpServlet { private static final long serialVersionUID = 1L; /** * @see HttpServlet#HttpServlet() */ public GetURL13() { super(); // TODO Auto-generated constructor stub } private String message; @Override public void init() throws ServletException { message = "Hello world, this message is from servlet!"; System.out.println("------"+message); try { ManageMySQL.getConnection(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub //response.getWriter().append("Served at: ").append(request.getContextPath()); String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/74.0.3729.169 Safari/537.36"; int pageNum=1; int pageSize=10; //for(pageNum=1;pageNum<101;pageNum++) for(pageNum=1;pageNum<924;pageNum++) { try { int page1= (pageNum-1)*pageSize; Map<Integer,String> map1 = ManageMySQL.getNewsLinkInTable(page1,pageSize,"data_bjszfhcxjswyh"); for(Integer key : map1.keySet()) { System.out.println(key+" "+map1.get(key)); String news_link = map1.get(key); String context1=""; String source1=""; String publishDate = ""; //String context1 = getContentByURL(news_link).replace(" ", ""); if(!news_link.contains("void")) { if(news_link.endsWith("html")) { Document documentRoot = Jsoup.connect(news_link).userAgent(agent1).get(); Elements elements2 = documentRoot.select("#content_list"); //Elements elements2_1 = documentRoot.select("div.div_right"); if(elements2.size()==1) { Element div_ele = elements2.get(0); context1 = div_ele.text(); ManageMySQL.updateContextAndPublishDate2(key, context1.replace("‘", "").replace(""", ""),source1,publishDate,"data_bjszfhcxjswyh"); } } } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } /** * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }