下载网页的基本方法

Posted 2020-08-08 ZepheryWen

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了下载网页的基本方法相关的知识，希望对你有一定的参考价值。

一、Java.net.URL

 1 import java.io.BufferedReader;
 2 import java.io.InputStreamReader;
 3 import java.net.URL;
 4 
 5 public class RetrivePage {
 6     public static String downloadPage(String path) throws Exception {
 7         URL pageURL = new URL(path);
 8         BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream()));
 9         String line;
10         StringBuilder pageBuffer = new StringBuilder();
11         while ((line = reader.readLine()) != null) {
12             pageBuffer.append(line);
13         }
14         return pageBuffer.toString();
15     }
16 
17     public static void main(String args[]) throws Exception {
18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
19     }
20 }

二、Scanner对象

 1 import java.io.InputStreamReader;
 2 import java.net.URL;
 3 import java.util.Scanner;
 4 
 5 public class RetrivePage {
 6     public static String downloadPage(String path) throws Exception {
 7         URL pageURL = new URL(path);
 8         Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8"));
 9         scanner.useDelimiter("\\z");
10         StringBuilder pageBuffer = new StringBuilder();
11         while (scanner.hasNext()) {
12            pageBuffer.append(scanner.next());
13         }
14         return pageBuffer.toString();
15     }
16 
17     public static void main(String args[]) throws Exception {
18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
19     }
20 }

三、套接字

 1 import java.io.*;
 2 import java.net.Socket;
 3 
 4 public class RetrivePage {
 5     public static void main(String args[]) throws Exception {
 6         String host = "blog.csdn.net";
 7         String file = "/column.html";
 8         int port = 80;
 9         Socket s = new Socket(host, port);
10         OutputStream out = s.getOutputStream();
11         PrintWriter outw = new PrintWriter(out, false);
12         outw.print("GET" + file + " HTTP/1.0\r\n");
13         outw.print("Accept:text/plain,text/html,text/*\r\n");
14         outw.print("\r\n");
15         outw.flush();
16         InputStream in = s.getInputStream();
17         InputStreamReader inr = new InputStreamReader(in);
18         BufferedReader bufferedReader = new BufferedReader(inr);
19         String line;
20         while ((line = bufferedReader.readLine()) != null) {
21             System.out.println(line);
22         }
23     }
24 }

四、HttpClient

 1 import org.apache.http.HttpEntity;
 2 import org.apache.http.HttpResponse;
 3 import org.apache.http.client.HttpClient;
 4 import org.apache.http.client.methods.HttpGet;
 5 import org.apache.http.impl.client.DefaultHttpClient;
 6 import org.apache.http.util.EntityUtils;
 7 public class RetrivePage {
 8     public static void main(String args[]) throws Exception {
 9         HttpClient httpClient=new DefaultHttpClient();
10         HttpGet httpGet=new HttpGet("http://www.sina.com");
11         HttpResponse response=httpClient.execute(httpGet);
12         HttpEntity entity=response.getEntity();
13         if(entity!=null){
14             System.out.println(EntityUtils.toString(entity,"utf-8"));
15             EntityUtils.consume(entity);
16         }
17         httpClient.getConnectionManager().shutdown();
18     }
19 }

以上是关于下载网页的基本方法的主要内容，如果未能解决你的问题，请参考以下文章