下载网页的基本方法
Posted ZepheryWen
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了下载网页的基本方法相关的知识,希望对你有一定的参考价值。
一、Java.net.URL
1 import java.io.BufferedReader; 2 import java.io.InputStreamReader; 3 import java.net.URL; 4 5 public class RetrivePage { 6 public static String downloadPage(String path) throws Exception { 7 URL pageURL = new URL(path); 8 BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream())); 9 String line; 10 StringBuilder pageBuffer = new StringBuilder(); 11 while ((line = reader.readLine()) != null) { 12 pageBuffer.append(line); 13 } 14 return pageBuffer.toString(); 15 } 16 17 public static void main(String args[]) throws Exception { 18 System.out.println(RetrivePage.downloadPage("http://www.sina.com")); 19 } 20 }
二、Scanner对象
1 import java.io.InputStreamReader; 2 import java.net.URL; 3 import java.util.Scanner; 4 5 public class RetrivePage { 6 public static String downloadPage(String path) throws Exception { 7 URL pageURL = new URL(path); 8 Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8")); 9 scanner.useDelimiter("\\z"); 10 StringBuilder pageBuffer = new StringBuilder(); 11 while (scanner.hasNext()) { 12 pageBuffer.append(scanner.next()); 13 } 14 return pageBuffer.toString(); 15 } 16 17 public static void main(String args[]) throws Exception { 18 System.out.println(RetrivePage.downloadPage("http://www.sina.com")); 19 } 20 }
三、套接字
1 import java.io.*; 2 import java.net.Socket; 3 4 public class RetrivePage { 5 public static void main(String args[]) throws Exception { 6 String host = "blog.csdn.net"; 7 String file = "/column.html"; 8 int port = 80; 9 Socket s = new Socket(host, port); 10 OutputStream out = s.getOutputStream(); 11 PrintWriter outw = new PrintWriter(out, false); 12 outw.print("GET" + file + " HTTP/1.0\r\n"); 13 outw.print("Accept:text/plain,text/html,text/*\r\n"); 14 outw.print("\r\n"); 15 outw.flush(); 16 InputStream in = s.getInputStream(); 17 InputStreamReader inr = new InputStreamReader(in); 18 BufferedReader bufferedReader = new BufferedReader(inr); 19 String line; 20 while ((line = bufferedReader.readLine()) != null) { 21 System.out.println(line); 22 } 23 } 24 }
四、HttpClient
1 import org.apache.http.HttpEntity; 2 import org.apache.http.HttpResponse; 3 import org.apache.http.client.HttpClient; 4 import org.apache.http.client.methods.HttpGet; 5 import org.apache.http.impl.client.DefaultHttpClient; 6 import org.apache.http.util.EntityUtils; 7 public class RetrivePage { 8 public static void main(String args[]) throws Exception { 9 HttpClient httpClient=new DefaultHttpClient(); 10 HttpGet httpGet=new HttpGet("http://www.sina.com"); 11 HttpResponse response=httpClient.execute(httpGet); 12 HttpEntity entity=response.getEntity(); 13 if(entity!=null){ 14 System.out.println(EntityUtils.toString(entity,"utf-8")); 15 EntityUtils.consume(entity); 16 } 17 httpClient.getConnectionManager().shutdown(); 18 } 19 }
以上是关于下载网页的基本方法的主要内容,如果未能解决你的问题,请参考以下文章