C#使用tesseract3.02识别验证码模拟登录
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了C#使用tesseract3.02识别验证码模拟登录相关的知识,希望对你有一定的参考价值。
一、前言
使用tesseract3.02识别有验证码的网站 安装tesseract3.02 在VS nuget 搜索Tesseract即可。
二、项目结构图
三、项目主要代码
1 using System; 2 using System.Collections.Concurrent; 3 using System.Collections.Generic; 4 using System.Diagnostics; 5 using System.Drawing; 6 using System.IO; 7 using System.Net; 8 using System.Net.Cache; 9 using System.Text; 10 11 namespace Tesseract.Test.Tools 12 { 13 /// <summary> 14 /// http帮助类 15 /// </summary> 16 public class HttpHelper 17 { 18 /// <summary> 19 /// 异步事件 20 /// </summary> 21 public HttpHelper() 22 { 23 CookieContainer = new CookieContainer(); 24 Encoding = Encoding.UTF8; 25 } 26 27 /// <summary> 28 /// 访问次数字典 29 /// </summary> 30 private ConcurrentDictionary<String, int> urlTryList = new ConcurrentDictionary<string, int>(); 31 32 /// <summary> 33 /// Cookie 容器 34 /// </summary> 35 public CookieContainer CookieContainer { set; get; } 36 37 /// <summary> 38 /// Post数据 39 /// </summary> 40 public String PostData { set; private get; } 41 42 /// <summary> 43 /// 页面语言 44 /// </summary> 45 public Encoding Encoding { set; private get; } 46 47 /// <summary> 48 /// 验证码路径 49 /// </summary> 50 public string CodePath { get; set; } 51 52 /// <summary> 53 /// 文件保存路径 54 /// </summary> 55 public String FileSavePath { set; private get; } 56 57 /// <summary> 58 /// 回调时间 59 /// </summary> 60 public Action<String, String> CallBackAction; 61 62 63 /// <summary> 64 /// 异步请求 65 /// </summary> 66 /// <param name="url">请求地址</param> 67 /// <param name="tryTimes">错误重试次数</param> 68 public void AsynRequest(String url, int tryTimes = 3) 69 { 70 Trace.TraceInformation(String.Concat("开始异步请求:", url)); 71 urlTryList.TryAdd(url, tryTimes); 72 var request = WebRequest.Create(url) as HttpWebRequest; 73 if (request == null) return; 74 request.Headers.Add("Accept-Encoding", "gzip,deflate,sdch"); 75 request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8"); 76 request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate | 77 DecompressionMethods.None; 78 request.Credentials = CredentialCache.DefaultNetworkCredentials; 79 request.UseDefaultCredentials = false; 80 request.KeepAlive = false; 81 request.PreAuthenticate = false; 82 request.ProtocolVersion = HttpVersion.Version10; 83 request.UserAgent = 84 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/34.0.1847.116 Safari/537.36"; 85 request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; 86 //request.CachePolicy = new RequestCachePolicy(RequestCacheLevel.NoCacheNoStore); 87 request.Timeout = 1000*60*3; 88 request.CookieContainer = CookieContainer; 89 //request.AllowAutoRedirect = false; 90 91 if (!String.IsNullOrEmpty(PostData)) 92 { 93 request.ContentType = "application/x-www-form-urlencoded"; 94 request.Method = "POST"; 95 request.BeginGetRequestStream(GetRequestStreamCallback, request); 96 } 97 else 98 { 99 //request.AllowReadStreamBuffering = false; 100 request.AllowWriteStreamBuffering = false; 101 request.BeginGetResponse(GetResponseCallback, request); 102 } 103 } 104 105 /// <summary> 106 /// 开始对用来写入数据的 Stream 对象的异步请求。 107 /// </summary> 108 /// <param name="ar"></param> 109 private void GetRequestStreamCallback(IAsyncResult ar) 110 { 111 var request = ar.AsyncState as HttpWebRequest; 112 if (request == null) return; 113 var postStream = request.EndGetRequestStream(ar); 114 var byteArray = Encoding.GetBytes(PostData); 115 postStream.Write(byteArray, 0, PostData.Length); 116 postStream.Close(); 117 request.BeginGetResponse(GetResponseCallback, request); 118 } 119 120 /// <summary> 121 /// 开始对 Internet 资源的异步请求。 122 /// </summary> 123 /// <param name="ar"></param> 124 private void GetResponseCallback(IAsyncResult ar) 125 { 126 var request = ar.AsyncState as HttpWebRequest; 127 if (request == null) return; 128 try 129 { 130 using (var response = request.EndGetResponse(ar) as HttpWebResponse) 131 { 132 if (response != null) 133 { 134 //if (response.StatusCode == HttpStatusCode.Found) 135 //{ 136 // string redirect = response.Headers["Location"]; 137 // if (!String.IsNullOrEmpty(redirect)) AsynRequest(redirect); 138 // return; 139 //} 140 141 if (response.StatusCode != HttpStatusCode.OK) 142 { 143 Trace.TraceError(String.Concat("请求地址:", request.RequestUri, " 失败,HttpStatusCode", 144 response.StatusCode)); 145 return; 146 } 147 148 using (var streamResponse = response.GetResponseStream()) 149 { 150 if (streamResponse != null) 151 { 152 if (!IsText(response.ContentType)) 153 { 154 var contentEncodingStr = response.ContentEncoding; 155 var contentEncoding = Encoding; 156 if (!String.IsNullOrEmpty(contentEncodingStr)) 157 contentEncoding = Encoding.GetEncoding(contentEncodingStr); 158 using (var streamRead = new StreamReader(streamResponse, contentEncoding)) 159 { 160 var str = streamRead.ReadToEnd(); 161 if (CallBackAction != null && !String.IsNullOrEmpty(str)) 162 CallBackAction.BeginInvoke(str, request.RequestUri.ToString(), (s) => { }, 163 null); 164 } 165 } 166 else 167 { 168 var fileName = String.Concat(DateTime.Now.ToString("yyyyMMdd"), "/", 169 DateTime.Now.ToString("yyyyMMddHHmmssffff"), 170 //Extensions.String_.Extensions.GetRnd(6, true, false, false, false, String.Empty), 171 ".jpg"); 172 var fileDirectory = Path.Combine(FileSavePath, DateTime.Now.ToString("yyyyMMdd")); 173 CodePath = Path.Combine(FileSavePath, fileName); 174 if (!Directory.Exists(fileDirectory)) 175 Directory.CreateDirectory(fileDirectory); 176 177 //下载文件 178 using ( 179 var fileStream = new FileStream(Path.Combine(FileSavePath, fileName), 180 FileMode.Create)) 181 { 182 var buffer = new byte[2048]; 183 int readLength; 184 do 185 { 186 readLength = streamResponse.Read(buffer, 0, buffer.Length); 187 fileStream.Write(buffer, 0, readLength); 188 } while (readLength != 0); 189 } 190 if (CallBackAction != null && !String.IsNullOrEmpty(fileName)) 191 CallBackAction.BeginInvoke(fileName, request.RequestUri.ToString(), (s) => { }, 192 null); 193 } 194 } 195 } 196 response.Close(); 197 } 198 } 199 } 200 catch (WebException ex) 201 { 202 Trace.TraceError(String.Concat("请求地址:", request.RequestUri, " 失败信息:", ex.Message)); 203 var toUrl = request.RequestUri.ToString(); 204 int tryTimes; 205 if (urlTryList.TryGetValue(toUrl, out tryTimes)) 206 { 207 urlTryList.TryUpdate(toUrl, tryTimes, tryTimes - 1); 208 if (tryTimes - 1 <= 0) 209 { 210 urlTryList.TryRemove(toUrl, out tryTimes); 211 return; 212 } 213 AsynRequest(toUrl); 214 } 215 } 216 finally 217 { 218 request.Abort(); 219 } 220 } 221 222 223 /// <summary> 224 /// 同步请求 225 /// </summary> 226 /// <param name="url">请求地址</param> 227 /// <param name="tryTimes">错误重试次数</param> 228 public String SyncRequest(String url, int tryTimes = 3) 229 { 230 Trace.TraceInformation(String.Concat("开始同步请求:", url)); 231 urlTryList.TryAdd(url, tryTimes); 232 var request = WebRequest.Create(url) as HttpWebRequest; 233 if (request == null) return String.Empty; 234 request.Headers.Add("Accept-Encoding", "gzip,deflate,sdch"); 235 request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8"); 236 request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate | 237 DecompressionMethods.None; 238 request.Credentials = CredentialCache.DefaultNetworkCredentials; 239 request.UseDefaultCredentials = false; 240 request.KeepAlive = false; 241 request.PreAuthenticate = false; 242 request.ProtocolVersion = HttpVersion.Version10; 243 request.UserAgent = 244 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36"; 245 request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; 246 request.CachePolicy = new RequestCachePolicy(RequestCacheLevel.NoCacheNoStore); 247 request.Timeout = 1000*60*3; 248 request.CookieContainer = CookieContainer; 249 request.AllowAutoRedirect = true; 250 251 if (!String.IsNullOrEmpty(PostData)) 252 { 253 request.ContentType = "application/x-www-form-urlencoded"; 254 request.Method = "POST"; 255 using (var postStream = request.GetRequestStream()) 256 { 257 var byteArray = Encoding.GetBytes(PostData); 258 postStream.Write(byteArray, 0, PostData.Length); 259 postStream.Close(); 260 } 261 } 262 else 263 { 264 //request.AllowReadStreamBuffering = false; 265 request.AllowWriteStreamBuffering = false; 266 } 267 try 268 { 269 using (var response = request.GetResponse() as HttpWebResponse) 270 { 271 if (response != null) 272 { 273 if (response.StatusCode != HttpStatusCode.OK) 274 { 275 Trace.TraceError(String.Concat("请求地址:", request.RequestUri, " 失败,HttpStatusCode", 276 response.StatusCode)); 277 return String.Empty; 278 } 279 using (var streamResponse = response.GetResponseStream()) 280 { 281 if (streamResponse != null) 282 { 283 if (!IsText(response.ContentType)) 284 { 285 var contentEncodingStr = response.ContentEncoding; 286 var contentEncoding = Encoding; 287 if (!String.IsNullOrEmpty(contentEncodingStr)) 288 contentEncoding = Encoding.GetEncoding(contentEncodingStr); 289 var streamRead = new StreamReader(streamResponse, contentEncoding); 290 var str = streamRead.ReadToEnd(); 291 if (CallBackAction != null && !String.IsNullOrEmpty(str)) 292 CallBackAction.BeginInvoke(str, request.RequestUri.ToString(), (s) => { }, null); 293 return str; 294 } 295 296 var fileName = String.Concat(DateTime.Now.ToString("yyyyMMdd"), "/", 297 DateTime.Now.ToString("yyyyMMddHHmmssffff"), 298 //Extensions.String_.Extensions.GetRnd(6, true, false, false, false, String.Empty), 299 Path.GetExtension(request.RequestUri.AbsoluteUri)); 300 var fileDirectory = Path.Combine(FileSavePath, DateTime.Now.ToString("yyyyMMdd")); 301 if (!Directory.Exists(fileDirectory)) 302 Directory.CreateDirectory(fileDirectory); 303 304 //下载文件 305 using ( 306 var fileStream = new FileStream(Path.Combine(FileSavePath, fileName), 307 FileMode.Create)) 308 { 309 var buffer = new byte[2048]; 310 int readLength; 311 do 312 { 313 readLength = streamResponse.Read(buffer, 0, buffer.Length); 314 fileStream.Write(buffer, 0, readLength); 315 } while (readLength != 0); 316 } 317 if (CallBackAction != null && !String.IsNullOrEmpty(fileName)) 318 CallBackAction.BeginInvoke(fileName, request.RequestUri.ToString(), (s) => { }, null); 319 return fileName; 320 } 321 } 322 response.Close(); 323 } 324 } 325 } 326 catch (WebException ex) 327 { 328 Trace.TraceError(String.Concat("请求地址:", request.RequestUri, " 失败信息:", ex.Message)); 329 var toUrl = request.RequestUri.ToString(); 330 if (urlTryList.TryGetValue(toUrl, out tryTimes)) 331 { 332 urlTryList.TryUpdate(toUrl, tryTimes, tryTimes - 1); 333 if (tryTimes - 1 <= 0) 334 { 335 urlTryList.TryRemove(toUrl, out tryTimes); 336 Trace.TraceError(String.Concat("请求地址重试失败:", request.RequestUri)); 337 return String.Empty; 338 } 339 SyncRequest(toUrl); 340 } 341 } 342 finally 343 { 344 request.Abort(); 345 } 346 return String.Empty; 347 } 348 349 /// <summary> 350 /// 验证码获取 351 /// </summary> 352 /// <param name="url">请求地址</param> 353 /// <param name="tryTimes">错误重试次数</param> 354 public Bitmap GetCheckCode(String url, int tryTimes = 3) 355 { 356 Trace.TraceInformation(String.Concat("开始同步请求:", url)); 357 urlTryList.TryAdd(url, tryTimes); 358 var request = WebRequest.Create(url) as HttpWebRequest; 359 if (request == null) return null; 360 request.Headers.Add("Accept-Encoding", "gzip,deflate,sdch"); 361 request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8"); 362 request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate | 363 DecompressionMethods.None; 364 request.Credentials = CredentialCache.DefaultNetworkCredentials; 365 request.UseDefaultCredentials = false; 366 request.KeepAlive = false; 367 request.PreAuthenticate = false; 368 request.ProtocolVersion = HttpVersion.Version10; 369 request.UserAgent = 370 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36"; 371 request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; 372 request.CachePolicy = new RequestCachePolicy(RequestCacheLevel.NoCacheNoStore); 373 request.Timeout = 1000*60*3; 374 request.CookieContainer = CookieContainer; 375 request.AllowAutoRedirect = true; 376 377 if (!String.IsNullOrEmpty(PostData)) 378 { 379 request.ContentType = "application/x-www-form-urlencoded"; 380 request.Method = "POST"; 381 using (var postStream = request.GetRequestStream()) 382 { 383 var byteArray = Encoding.GetBytes(PostData); 384 postStream.Write(byteArray, 0, PostData.Length); 385 postStream.Close(); 386 } 387 } 388 else 389 { 390 //request.AllowReadStreamBuffering = false; 391 request.AllowWriteStreamBuffering = false; 392 } 393 try 394 { 395 using (var response = request.GetResponse() as HttpWebResponse) 396 { 39
以上是关于C#使用tesseract3.02识别验证码模拟登录的主要内容,如果未能解决你的问题,请参考以下文章