获取URL列表,设置代理请求URL,https的加密方式处理
Posted 雪山非猪
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了获取URL列表,设置代理请求URL,https的加密方式处理相关的知识,希望对你有一定的参考价值。
做了一个测试的一个小工具,需求如下:
1、有一批URL列表,需要知道哪个URL请求响应内容中包含http:关键字的。
2、url请求包括http和https 2种协议
3、要部署在linux服务器上,且linux服务器只能通过代理来连接外网
帖一下我的核心代码吧:
package com.cn.util; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.Proxy; import java.net.URL; import java.net.URLConnection; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import javax.security.cert.CertificateException; import javax.security.cert.X509Certificate; import javax.xml.ws.Response; import net.sf.json.JSONObject; public class HttpGet { // SSL private static class TrustAnyTrustManager implements X509TrustManager { public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { } public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { } public void checkClientTrusted( java.security.cert.X509Certificate[] arg0, String arg1) throws java.security.cert.CertificateException { // TODO Auto-generated method stub } public void checkServerTrusted( java.security.cert.X509Certificate[] arg0, String arg1) throws java.security.cert.CertificateException { // TODO Auto-generated method stub } public java.security.cert.X509Certificate[] getAcceptedIssuers() { // TODO Auto-generated method stub return null; } } private static class TrustAnyHostnameVerifier implements HostnameVerifier { public boolean verify(String hostname, SSLSession session) { return true; } } // 得到URL public List getUrl(String file) { BufferedReader bf = null; List urlList = new ArrayList(); int linenum = 0; try { bf = new BufferedReader(new FileReader(file)); String info = ""; while ((info = bf.readLine()) != null) { linenum++; if (!"".equals(info)) { /* * if ((info.startsWith("http:") || * info.startsWith("https:"))) { * System.out.println("url===>" + info); urlList.add(info); * }else{ System.out.println("第"+linenum+"行,请求协议或有问题。。:"); } */ System.out.println("url===>" + info); urlList.add(info); } else { System.out.println("第" + linenum + "行url为空"); } } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { bf.close(); } catch (IOException e) { e.printStackTrace(); } } return urlList; } // 获取请求非200状态的url public List<String> http404(List<String> urls) { List<String> result404 = new ArrayList<String>(); try { /* * System.setProperty("proxySet", "true"); * System.setProperty("http.proxyHost", "192.168.11.254"); * System.setProperty("http.proxyPort", "8080"); */ Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress( "192.168.11.254", 8080)); for (String url2 : urls) { if (url2.startsWith("http:") || (url2.startsWith("https:"))) { URL realUrl = new URL(new String(url2.getBytes("utf-8"))); HttpURLConnection connection = (HttpURLConnection) realUrl .openConnection(proxy); // 如果是https if (connection instanceof HttpsURLConnection) { SSLContext sc = SSLContext.getInstance("SSL"); sc.init(null, new TrustManager[] { new TrustAnyTrustManager() }, new java.security.SecureRandom()); ((HttpsURLConnection) connection) .setSSLSocketFactory(sc.getSocketFactory()); ((HttpsURLConnection) connection) .setHostnameVerifier(new TrustAnyHostnameVerifier()); } // 设置通用的请求属性 connection.setRequestProperty("accept", "*/*"); connection.setRequestProperty("connection", "Keep-Alive"); connection .setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); // 连接 connection.connect(); if (connection.getResponseCode() != 200) { result404.add(url2); } } else { result404.add(url2); } } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } catch (KeyManagementException e) { e.printStackTrace(); } return result404; } // 请求获取响应,把包含搜索关键字的URL存入LIST中 public List<String> sendUrl(List<String> urls, String keyword) { List<String> result200 = new ArrayList<String>(); BufferedReader in = null; try { /* * System.setProperty("proxySet", "true"); * System.setProperty("http.proxyHost", "192.168.11.254"); * System.setProperty("http.proxyPort", "8080"); */ Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress( "192.168.11.254", 8080)); for (String url2 : urls) { StringBuffer response = new StringBuffer(); if (url2.startsWith("http:") || (url2.startsWith("https:"))) { URL realUrl = new URL(new String(url2.getBytes("utf-8"))); HttpURLConnection connection = (HttpURLConnection) realUrl .openConnection(proxy); // 如果是https if (connection instanceof HttpsURLConnection) { SSLContext sc = SSLContext.getInstance("SSL"); sc.init(null, new TrustManager[] { new TrustAnyTrustManager() }, new java.security.SecureRandom()); ((HttpsURLConnection) connection) .setSSLSocketFactory(sc.getSocketFactory()); ((HttpsURLConnection) connection) .setHostnameVerifier(new TrustAnyHostnameVerifier()); } // 设置通用的请求属性 connection.setRequestProperty("accept", "*/*"); connection.setRequestProperty("connection", "Keep-Alive"); connection .setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); // 连接 connection.connect(); if (connection.getResponseCode() == 200) { in = new BufferedReader(new InputStreamReader( connection.getInputStream())); String line = ""; while ((line = in.readLine()) != null) { response.append(line); } in.close(); } if (response.toString().contains(keyword)) { result200.add(url2); } } System.out.println(url2 + "=============>" + response.toString()); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } catch (KeyManagementException e) { e.printStackTrace(); } return result200; } public static void main(String[] args) { HttpGet ht = new HttpGet(); List urls = ht.getUrl("E:\\课件\\https\\1.txt"); String keyword = "http:"; List<String> urllist = ht.http404(urls); for (String string : urllist) { System.out.println("404=" + string); } } }
TrustAnyTrustManager 类中的方法,是我在网上找的,目地是解决SSL加密请求,针对https://协议的请求来处理的。具体是什么我也不懂
/* * System.setProperty("proxySet", "true"); * System.setProperty("http.proxyHost", "192.168.11.254"); * System.setProperty("http.proxyPort", "8080"); */
这种设置代理的方式,不行的
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(
"192.168.11.254", 8080));
HttpURLConnection connection = (HttpURLConnection) realUrl
.openConnection(proxy);
以上是关于获取URL列表,设置代理请求URL,https的加密方式处理的主要内容,如果未能解决你的问题,请参考以下文章