C# 采集
Posted hofmann
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了C# 采集相关的知识,希望对你有一定的参考价值。
C# 网页图片采集
http://blog.csdn.net/a237428367/article/details/5987832
using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Net;
- using System.IO;
- using System.Windows.Forms;
- namespace ImageCollect
- {
- public class GatherPic
- {
- private string savePath;
- private string getUrl;
- private WebBrowser wb;
- private int iImgCount;
- //初始化参数
- public GatherPic(string sWebUrl, string sSavePath)
- {
- this.getUrl = sWebUrl;
- this.savePath = sSavePath;
- }
- //开始采集
- public bool start()
- {
- if (getUrl.Trim().Equals(""))
- {
- MessageBox.Show("哪来的虾米连网址都没输!");
- return false;
- }
- this.wb = new WebBrowser();
- this.wb.Navigate(getUrl);
- //委托事件
- this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);
- return true;
- }
- //WebBrowser.DocumentCompleted委托事件
- private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
- {
- //页面里框架iframe加载完成不掉用SearchImgList()
- if (e.Url != wb.Document.Url) return;
- SearchImgList();
- }
- //检查出所有图片并采集到本地
- public void SearchImgList()
- {
- string sImgUrl;
- //取得所有图片地址
- HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");
- this.iImgCount = elemColl.Count;
- foreach (HtmlElement elem in elemColl)
- {
- sImgUrl = elem.GetAttribute("src");
- //调用保存远程图片函数
- SaveImageFromWeb(sImgUrl, this.savePath);
- }
- }
- //保存远程图片函数
- public int SaveImageFromWeb(string imgUrl, string path)
- {
- string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);
- path = path + "//" + imgName;
- string defaultType = ".jpg";
- string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };
- string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
- foreach (string it in imgTypes)
- {
- if (imgType.ToLower().Equals(it))
- break;
- if (it.Equals(".bmp"))
- imgType = defaultType;
- }
- try
- {
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
- request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
- request.Timeout = 10000;
- WebResponse response = request.GetResponse();
- Stream stream = response.GetResponseStream();
- if (response.ContentType.ToLower().StartsWith("image/"))
- {
- byte[] arrayByte = new byte[1024];
- int imgLong = (int)response.ContentLength;
- int l = 0;
- // CreateDirectory(path);
- FileStream fso = new FileStream(path, FileMode.Create);
- while (l < imgLong)
- {
- int i = stream.Read(arrayByte, 0, 1024);
- fso.Write(arrayByte, 0, i);
- l += i;
- }
- fso.Close();
- stream.Close();
- response.Close();
- return 1;
- }
- else
- {
- return 0;
- }
- }
- catch (WebException)
- {
- return 0;
- }
- catch (UriFormatException)
- {
- return 0;
- }
- }
- }
- }
调用方法
- GatherPic g = new GatherPic(“http://www.baidu.com”,"E:/XXX");
- g.start();
=====================================================
在web项目中使用WebBrowser类-----给网站抓图
最近做一个WEB项目,其中要求有个功能就是程序能网页抓图,举个例子: 在test.aspx页面上放一个TextBox和一个Button,TextBox用来输入要抓取的网页地址,然后按了Button之后,服务器要对前面输入的网址进行抓图,然后显示出来。我把抓图的业务逻辑做成一个类:
using System; using System.Data; using System.Windows.Forms; using System.Drawing; /// <summary> /// WebSnap :网页抓图对象 /// </summary> public class WebSnap2 { public WebSnap2() { // // TODO: 在此处添加构造函数逻辑 // } /// <summary> /// 开始一个抓图并返回图象 /// </summary> /// <param name="Url">要抓取的网页地址</param> /// <returns></returns> public Bitmap StartSnap(string Url) { WebBrowser myWB = this.GetPage(Url); Bitmap returnValue = this.SnapWeb(myWB); myWB.Dispose(); return returnValue; } private WebBrowser GetPage(string Url) { WebBrowser myWB = new WebBrowser(); myWB.ScrollBarsEnabled = false; myWB.Navigate(Url); while (myWB.ReadyState != WebBrowserReadyState.Complete) { System.Windows.Forms.Application.DoEvents(); } return myWB; } private Bitmap SnapWeb(WebBrowser wb) { HtmlDocument hd = wb.Document; int height = Convert.ToInt32(hd.Body.GetAttribute("scrollHeight")) + 10; int width = Convert.ToInt32(hd.Body.GetAttribute("scrollWidth")) + 10; wb.Height = height; wb.Width = width; Bitmap bmp = new Bitmap(width, height); Rectangle rec = new Rectangle(); rec.Width = width; rec.Height = height; wb.DrawToBitmap(bmp, rec); return bmp; } }
然后在test.asp的button_click事件里面调用:
WebSnap ws = new WebSnap(); Bitmap bmp= ws.StartSnap(TextBox1.Text); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg); Response.BinaryWrite(ms.GetBuffer());
以上是关于C# 采集的主要内容,如果未能解决你的问题,请参考以下文章