京东手机销售价格抓取
Posted 凡是过往,皆为序章。
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了京东手机销售价格抓取相关的知识,希望对你有一定的参考价值。
/// <summary> /// 京东手机销售价格抓取 /// </summary> private static void GetPhonePrice ( object querry) { var querry1=querry as Tuple<int,int>; //HttpHelper helper=new HttpHelper(); HttpItem item=new HttpItem() { Method="GET" }; List<PhonePrice> list=new List<PhonePrice>(); for ( var i = querry1.Item1 ; i < querry1.Item2; i++ ) { item.URL = "https://list.jd.com/list.html?cat=9987,653,655&page=" + i + "&sort=sort_rank_asc&trans=1&JL=6_0_0#J_main"; var result=helper.GetHtml(item).Html; HtmlDocument doc=new HtmlDocument(); doc.LoadHtml ( result ); var info=doc.DocumentNode.SelectNodes("//div[@class=‘gl-i-wrap j-sku-item‘]"); List<PhonePrice> phoneList=info.Select(p=>new PhonePrice { PhoneName =p.InnerText.Replace(" ","").Replace("<","").Replace(">","").Replace("关注","").Replace("\n",""), SkuId=p.Attributes["data-sku"].Value } ).ToList(); var skuList=info.Select(p=>p.Attributes["data-sku"].Value).ToList(); string searchSku="https://p.3.cn/prices/mgets?callback=jQuery1214924&ext=11000000&pin=&type=1&area=22_1930_4284_0&skuIds="; for ( var j = 0 ; j < skuList.Count ( ) ; j++ ) { if ( j != skuList.Count ( ) - 1 ) searchSku += "J_" + skuList [ j ] + ","; else searchSku += "J_" + skuList [ j ] + "&pdbp=0&pdtk=&pdpin=&pduid=1523417466534118232925&source=list_pc_front&_=1523501043677"; } item.URL = searchSku; var result1=helper.GetHtml(item).Html.Replace("jQuery1214924(","").Replace(");",""); var priceJArray=JsonConvert.DeserializeObject<JArray>(result1); for ( var k = 0 ; k < priceJArray.Count ; k++ ) { var sku_id=priceJArray[k]["id"].ToString().Replace("J_",""); if ( sku_id == phoneList [ k ].SkuId ) { phoneList [ k ].Price = priceJArray [ k ] [ "p" ].ToString ( ); } } list.AddRange ( phoneList ); using ( con ) { TSqlBulkInsert ( "INSERT INTO jd_sale_price" , list ); } //foreach ( var item3 in phoneList ) //{ // Console.WriteLine ( "名称:{0},价格:{1}" , item3.PhoneName , item3.Price ); //} } //return list; } /// <summary> /// 批量数据写入 /// </summary> /// <typeparam name="T"></typeparam> /// <param name="sql"></param> /// <param name="dataList"></param> /// <returns></returns> public static bool TSqlBulkInsert<T> ( string sql , List<T> dataList ) where T : new() { bool result = false; //获取T的公共属性 Type type = dataList[0].GetType(); PropertyInfo[] param = type.GetProperties(); List<string> properotyList = param.Select(p => p.Name).ToList(); using ( con ) { con.Open ( ); sql = CreateSqlStr ( sql , dataList , properotyList ); mysqlTransaction tran = (MySqlTransaction)con.BeginTransaction(); MySqlCommand commd = new MySqlCommand(sql,(MySqlConnection) con, tran); try { int query = commd.ExecuteNonQuery(); tran.Commit ( ); result = true; } catch ( Exception e ) { tran.Rollback ( ); throw; } return result; } } /// <summary> /// 构建大批量写入数据SQL语句 /// </summary> /// <typeparam name="T"></typeparam> /// <param name="sql">原始SQL</param> /// <param name="dataList">写入数据</param> /// <param name="properotyList"></param> /// <returns></returns> private static string CreateSqlStr<T> ( string sql , List<T> dataList , List<string> properotyList ) where T : new() { StringBuilder sb = new StringBuilder(); sb.Append ( sql ); sb.Append ( " VALUES" ); int i = 0; foreach ( var item in dataList ) { sb.Append ( "(" ); for ( int j = 0 ; j < properotyList.Count ; j++ ) { PropertyInfo properotyInfo = item.GetType().GetProperty(properotyList[j]); // 属性的信息 object properotyValue = properotyInfo.GetValue(item,null);// 属性的值 sb.Append ( "\"" ); sb.Append ( properotyValue.ToString ( ) ); sb.Append ( "\"" ); if ( j < properotyList.Count - 1 ) { sb.Append ( "," ); } } sb.Append ( ")" ); if ( i++ < dataList.Count - 1 ) { sb.Append ( "," ); } } sql = sb.ToString ( ); return sql; } /// <summary> /// 京东手机评级数据抓取 /// </summary> private static void GetComment ( ) { HttpItem item = new HttpItem() { URL = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv1957&productId=6494554&score=0&sortType=5&page=99&pageSize=20&isShadowSku=0&fold=1" , Method = "GET" , }; List<Comment> list=new List<Comment>(); for ( var i = 0 ; i < 100 ; i++ ) { item.URL = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv1957&productId=6494554&score=0&sortType=5&page=" + i + "&pageSize=20&isShadowSku=0&fold=1"; var result=helper.GetHtml(item).Html; result = result.Replace ( "fetchJSON_comment98vv1957(" , "" ).Replace ( ");" , "" ); var jobject=JsonConvert.DeserializeObject<JObject>(result).Value<JToken>("comments"); foreach ( var info in jobject ) { Comment comment=new Comment(); comment.userClientShow = info.SelectToken ( "userClientShow" ) == null ? "" : info.SelectToken ( "userClientShow" ).ToString ( ).Replace ( "来自" , "" ).Replace ( "客户端" , "" ).Replace ( "京东" , "" ).Replace ( "购物" , "" ); comment.nickname = info.SelectToken ( "nickname" ) == null ? "" : info.SelectToken ( "nickname" ).ToString ( ); comment.userExpValue = info.SelectToken ( "userExpValue" ) == null ? "" : info.SelectToken ( "userExpValue" ).ToString ( ); comment.userLevelName = info.SelectToken ( "userLevelName" ) == null ? "" : info.SelectToken ( "userLevelName" ).ToString ( ); comment.referenceTime = info.SelectToken ( "referenceTime" ) == null ? "" : info.SelectToken ( "referenceTime" ).ToString ( ); comment.referenceName = info.SelectToken ( "referenceName" ) == null ? "" : info.SelectToken ( "referenceName" ).ToString ( ); comment.productColor = info.SelectToken ( "productColor" ) == null ? "" : info.SelectToken ( "productColor" ).ToString ( ); comment.productSize = info.SelectToken ( "productSize" ) == null ? "" : info.SelectToken ( "productSize" ).ToString ( ); comment.score = info.SelectToken ( "score" ) == null ? "" : info.SelectToken ( "score" ).ToString ( ); comment.content = info.SelectToken ( "content" ) == null ? "" : info.SelectToken ( "content" ).ToString ( ); comment.creationTime = info.SelectToken ( "creationTime" ) == null ? "" : info.SelectToken ( "creationTime" ).ToString ( ); list.Add ( comment ); } } NPOI.SS.UserModel.IWorkbook workbook=new NPOI.HSSF.UserModel.HSSFWorkbook(); NPOI.SS.UserModel.ISheet sheet=workbook.CreateSheet("京东vivo评价数据"); for ( var i = 0 ; i < list.Count ; i++ ) { NPOI.SS.UserModel.IRow row = sheet.CreateRow ( i + 1 ); if ( i == 0 ) { row.CreateCell ( 1 ).SetCellValue ( "userClientShow" ); row.CreateCell ( 2 ).SetCellValue ( "nickname" ); row.CreateCell ( 3 ).SetCellValue ( "userExpValue" ); row.CreateCell ( 4 ).SetCellValue ( "userLevelName" ); row.CreateCell ( 5 ).SetCellValue ( "referenceTime" ); row.CreateCell ( 6 ).SetCellValue ( "referenceName" ); row.CreateCell ( 7 ).SetCellValue ( "productColor" ); row.CreateCell ( 8 ).SetCellValue ( "productSize" ); row.CreateCell ( 9 ).SetCellValue ( "score" ); row.CreateCell ( 10 ).SetCellValue ( "content" ); row.CreateCell ( 11 ).SetCellValue ( "creationTime" ); } else { row.CreateCell ( 1 ).SetCellValue ( list [ i ].userClientShow ); row.CreateCell ( 2 ).SetCellValue ( list [ i ].nickname ); row.CreateCell ( 3 ).SetCellValue ( list [ i ].userExpValue ); row.CreateCell ( 4 ).SetCellValue ( list [ i ].userLevelName ); row.CreateCell ( 5 ).SetCellValue ( list [ i ].referenceTime ); row.CreateCell ( 6 ).SetCellValue ( list [ i ].referenceName ); row.CreateCell ( 7 ).SetCellValue ( list [ i ].productColor ); row.CreateCell ( 8 ).SetCellValue ( list [ i ].productSize ); row.CreateCell ( 9 ).SetCellValue ( list [ i ].score ); row.CreateCell ( 10 ).SetCellValue ( list [ i ].content ); row.CreateCell ( 11 ).SetCellValue ( list [ i ].creationTime ); } } var path=AppDomain.CurrentDomain.BaseDirectory+"test.xls"; using ( System.IO.FileStream file = new System.IO.FileStream ( path , System.IO.FileMode.Create ) ) { workbook.Write ( file ); file.Close ( ); } } public static int Test(int x) { return x; } } public class PhonePrice { public string PhoneName { get; set; } public string Price { get; set; } public string SkuId { get; set; } } public class Comment { public string userClientShow { get; set; } public string nickname { get; set; } public string userExpValue { get; set; } public string userLevelName { get; set; } public string referenceTime { get; set; } public string referenceName { get; set; } public string productColor { get; set; } public string productSize { get; set; } public string score { get; set; } public string content { get; set; } public string creationTime { get; set; } }
以上是关于京东手机销售价格抓取的主要内容,如果未能解决你的问题,请参考以下文章
Python爬虫编程思想(70): 项目实战--抓取京东商城手机销售排行榜