各种获取页面的方法

Posted 2020-10-21 倪子

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了各种获取页面的方法相关的知识，希望对你有一定的参考价值。

是程序中时不时用到的还没有整理出来等回头在整理一份

 static string GetWebClient(string url)
        {
            string strhtml = "";
            WebClient myWebClient = new WebClient();
            Stream myStream = myWebClient.OpenRead(url);
            StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
            strHTML = sr.ReadToEnd();
            myStream.Close();
            return strHTML;
        }

static string GetWebRequest(string url)
        {
            Uri uri = new Uri(url);
            WebRequest myReq = WebRequest.Create(uri);
            WebResponse result = myReq.GetResponse();
            Stream receviceStream = result.GetResponseStream();
            StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
            string strHTML = readerOfStream.ReadToEnd();
            readerOfStream.Close();
            receviceStream.Close();
            result.Close();
            return strHTML;
        }

static string GetHttpWebRequest(string url)
        {
            Uri uri = new Uri(url);
            HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
            myReq.Host = "tandfonline.com";
            myReq.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0";
            myReq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");


            myReq.KeepAlive = true;
           
            HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
            Stream receviceStream = result.GetResponseStream();
            StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
            string strHTML = readerOfStream.ReadToEnd();
            readerOfStream.Close();
            receviceStream.Close();
            result.Close();
            return strHTML;
        }

static string GetContentFromUrl(string URL)
        {
            try
            {
                string strBuff = "";
                int byteRead = 0;
                char[] cbuffer = new char[256];
                HttpWebRequest httpReq = (HttpWebRequest)WebRequest.Create(new Uri(URL));
                HttpWebResponse httpResp = (HttpWebResponse)httpReq.GetResponse();
                Stream respStream = httpResp.GetResponseStream();
                StreamReader respStreamReader = new StreamReader(respStream, System.Text.Encoding.UTF8);
                byteRead = respStreamReader.Read(cbuffer, 0, 256);
                while (byteRead != 0)
                {
                    string strResp = new string(cbuffer, 0, byteRead);
                    strBuff = strBuff + strResp;
                    byteRead = respStreamReader.Read(cbuffer, 0, 256);
                }
                respStream.Close();
                return strBuff;
            }
            catch (Exception ex)
            {
                return ex.Message;
            }
        }

static string GetContentFromUrl1(string url)
        {
            try
            {
                WebClient client = new WebClient();
                client.Credentials = CredentialCache.DefaultCredentials;//获取或设置请求凭据  
                Byte[] pageData = client.DownloadData(url); //下载数据  
                string pageHtml = System.Text.Encoding.UTF8.GetString(pageData);
                return pageHtml;
            }
            catch (WebException ex)
            {
                return ex.Message;
            }
        }

static string GetStringByUrl(string Url)
        {
            if (Url.Equals("about:blank")) return null; ;
            if (!Url.StartsWith("http://") && !Url.StartsWith("https://")) { Url = "http://" + Url; }
            int dialCount = 0;
            loop:
            StreamReader sreader = null;
            string result = string.Empty;
            try
            {
                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);
                //httpWebRequest.Timeout = 20;
               
                httpWebRequest.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";
                httpWebRequest.Accept = "*/*";
                httpWebRequest.KeepAlive = true;
                httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                
                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                if (httpWebResponse.StatusCode == HttpStatusCode.OK)
                {
                    sreader = new StreamReader(httpWebResponse.GetResponseStream(), System.Text.Encoding.GetEncoding("utf-8"));
                    
                    char[] cCont = new char[256];
                    int count = sreader.Read(cCont, 0, 256);
                    while (count > 0)
                    { // Dumps the 256 characters on a string and displays the string to the console. 
                        String str = new String(cCont, 0, count);
                        result += str;
                        count = sreader.Read(cCont, 0, 256);
                    }
                }
                if (null != httpWebResponse) { httpWebResponse.Close(); }
                return result;
            }
            catch (WebException e)
            {
                if (e.Status == WebExceptionStatus.ConnectFailure)
                {
                    dialCount++;
                    ReDial();
                }
                if (dialCount < 5) { goto loop; }
                return null;
            }
            finally { if (sreader != null) { sreader.Close(); } }
        }

下面的方法是调用WebBrowser 是为了页面加载完成后在取数据！提示：据说此代码运行没有前面的快所以不到需要的时候不用

　　　　[STAThread]
static void Main(string[] args)

   [STAThread]
static void Main(string[] args)
        {

                      var html = GetPageStringbyWebBrowser(url); 
                           
        }

 private static string GetPageStringbyWebBrowser(string url)
        {

            string htmlstr = "";
            if (url.Equals("about:blank")) htmlstr = "";
            if (!url.StartsWith("http://") && !url.StartsWith("https://")) { url = "http://" + url; }

            WebBrowser myWB = new WebBrowser();
            myWB.ScrollBarsEnabled = false;
            myWB.Navigate(url.ToString());

            while (myWB.ReadyState != WebBrowserReadyState.Complete)
            {
                System.Windows.Forms.Application.DoEvents();
            }
            if (myWB != null)
            {
                System.IO.StreamReader getReader = null;
                try
                {
                    getReader = new System.IO.StreamReader(myWB.DocumentStream, System.Text.Encoding.GetEncoding(myWB.Document.Encoding));
                    htmlstr = getReader.ReadToEnd();
                }
                catch { htmlstr = ""; }
                finally
                {
                    if (getReader != null) { getReader.Close(); }
                    myWB.Dispose();
                }
            }
            return htmlstr;
        }

以上是关于各种获取页面的方法的主要内容，如果未能解决你的问题，请参考以下文章