京东手机销售价格抓取

Posted 凡是过往,皆为序章。

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了京东手机销售价格抓取相关的知识,希望对你有一定的参考价值。

   /// <summary>
        /// 京东手机销售价格抓取
        /// </summary>
        private static void GetPhonePrice ( object querry)
        {
            var querry1=querry as Tuple<int,int>;
            //HttpHelper helper=new HttpHelper();
            HttpItem item=new HttpItem()
            {
                Method="GET"
            };
            List<PhonePrice> list=new List<PhonePrice>();
            for ( var i = querry1.Item1  ; i < querry1.Item2; i++ )
            {
                item.URL = "https://list.jd.com/list.html?cat=9987,653,655&page=" + i + "&sort=sort_rank_asc&trans=1&JL=6_0_0#J_main";
                var result=helper.GetHtml(item).Html;
                HtmlDocument doc=new HtmlDocument();
                doc.LoadHtml ( result );
                var info=doc.DocumentNode.SelectNodes("//div[@class=‘gl-i-wrap j-sku-item‘]");

                List<PhonePrice> phoneList=info.Select(p=>new PhonePrice
                {
                    PhoneName =p.InnerText.Replace(" ","").Replace("<","").Replace(">","").Replace("关注","").Replace("\n",""),
                    SkuId=p.Attributes["data-sku"].Value
                } ).ToList();

                var skuList=info.Select(p=>p.Attributes["data-sku"].Value).ToList();
                string searchSku="https://p.3.cn/prices/mgets?callback=jQuery1214924&ext=11000000&pin=&type=1&area=22_1930_4284_0&skuIds=";
                for ( var j = 0 ; j < skuList.Count ( ) ; j++ )
                {
                    if ( j != skuList.Count ( ) - 1 )
                        searchSku += "J_" + skuList [ j ] + ",";
                    else
                        searchSku += "J_" + skuList [ j ] + "&pdbp=0&pdtk=&pdpin=&pduid=1523417466534118232925&source=list_pc_front&_=1523501043677";
                }
                item.URL = searchSku;
                var result1=helper.GetHtml(item).Html.Replace("jQuery1214924(","").Replace(");","");
                var priceJArray=JsonConvert.DeserializeObject<JArray>(result1);
                for ( var k = 0 ; k < priceJArray.Count ; k++ )
                {
                    var sku_id=priceJArray[k]["id"].ToString().Replace("J_","");
                    if ( sku_id == phoneList [ k ].SkuId )
                    {
                        phoneList [ k ].Price = priceJArray [ k ] [ "p" ].ToString ( );
                    }
                }
                list.AddRange ( phoneList );
                using ( con )
                {
                    TSqlBulkInsert ( "INSERT INTO jd_sale_price" , list );
                }
                //foreach ( var item3 in phoneList )
                //{
                //    Console.WriteLine ( "名称:{0},价格:{1}" , item3.PhoneName , item3.Price );
                //}
            }
            //return list;
        }
        /// <summary>
        /// 批量数据写入
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="sql"></param>
        /// <param name="dataList"></param>
        /// <returns></returns>

        public static bool TSqlBulkInsert<T> ( string sql , List<T> dataList ) where T : new()
        {
            bool result = false;
            //获取T的公共属性
            Type type = dataList[0].GetType();
            PropertyInfo[] param = type.GetProperties();
            List<string> properotyList = param.Select(p => p.Name).ToList();

            using ( con )
            {
                con.Open ( );
                sql = CreateSqlStr ( sql , dataList , properotyList );

                mysqlTransaction tran = (MySqlTransaction)con.BeginTransaction();
                MySqlCommand commd = new MySqlCommand(sql,(MySqlConnection) con, tran);
                try
                {
                    int query = commd.ExecuteNonQuery();
                    tran.Commit ( );
                    result = true;
                }
                catch ( Exception e )
                {
                    tran.Rollback ( );
                    throw;
                }
                return result;
            }
        }

        /// <summary>
        /// 构建大批量写入数据SQL语句
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="sql">原始SQL</param>
        /// <param name="dataList">写入数据</param>
        /// <param name="properotyList"></param>
        /// <returns></returns>
        private static string CreateSqlStr<T> ( string sql , List<T> dataList , List<string> properotyList ) where T : new()
        {
            StringBuilder sb = new StringBuilder();
            sb.Append ( sql );
            sb.Append ( " VALUES" );
            int i = 0;
            foreach ( var item in dataList )
            {
                sb.Append ( "(" );
                for ( int j = 0 ; j < properotyList.Count ; j++ )
                {
                    PropertyInfo properotyInfo = item.GetType().GetProperty(properotyList[j]); // 属性的信息
                    object properotyValue = properotyInfo.GetValue(item,null);// 属性的值
                    sb.Append ( "\"" );
                    sb.Append ( properotyValue.ToString ( ) );
                    sb.Append ( "\"" );
                    if ( j < properotyList.Count - 1 )
                    {
                        sb.Append ( "," );
                    }
                }
                sb.Append ( ")" );
                if ( i++ < dataList.Count - 1 )
                {
                    sb.Append ( "," );
                }
            }
            sql = sb.ToString ( );
            return sql;
        }


        /// <summary>
        /// 京东手机评级数据抓取
        /// </summary>
        private static void GetComment ( )
        {
            HttpItem item = new HttpItem()
            {
                URL = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv1957&productId=6494554&score=0&sortType=5&page=99&pageSize=20&isShadowSku=0&fold=1" ,
                Method = "GET" ,
            };
            List<Comment> list=new List<Comment>();

            for ( var i = 0 ; i < 100 ; i++ )
            {
                item.URL = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv1957&productId=6494554&score=0&sortType=5&page=" + i + "&pageSize=20&isShadowSku=0&fold=1";
                var result=helper.GetHtml(item).Html;
                result = result.Replace ( "fetchJSON_comment98vv1957(" , "" ).Replace ( ");" , "" );
                var jobject=JsonConvert.DeserializeObject<JObject>(result).Value<JToken>("comments");
                foreach ( var info in jobject )
                {
                    Comment comment=new Comment();
                    comment.userClientShow = info.SelectToken ( "userClientShow" ) == null ? "" : info.SelectToken ( "userClientShow" ).ToString ( ).Replace ( "来自" , "" ).Replace ( "客户端" , "" ).Replace ( "京东" , "" ).Replace ( "购物" , "" );
                    comment.nickname = info.SelectToken ( "nickname" ) == null ? "" : info.SelectToken ( "nickname" ).ToString ( );
                    comment.userExpValue = info.SelectToken ( "userExpValue" ) == null ? "" : info.SelectToken ( "userExpValue" ).ToString ( );
                    comment.userLevelName = info.SelectToken ( "userLevelName" ) == null ? "" : info.SelectToken ( "userLevelName" ).ToString ( );
                    comment.referenceTime = info.SelectToken ( "referenceTime" ) == null ? "" : info.SelectToken ( "referenceTime" ).ToString ( );
                    comment.referenceName = info.SelectToken ( "referenceName" ) == null ? "" : info.SelectToken ( "referenceName" ).ToString ( );
                    comment.productColor = info.SelectToken ( "productColor" ) == null ? "" : info.SelectToken ( "productColor" ).ToString ( );
                    comment.productSize = info.SelectToken ( "productSize" ) == null ? "" : info.SelectToken ( "productSize" ).ToString ( );
                    comment.score = info.SelectToken ( "score" ) == null ? "" : info.SelectToken ( "score" ).ToString ( );
                    comment.content = info.SelectToken ( "content" ) == null ? "" : info.SelectToken ( "content" ).ToString ( );
                    comment.creationTime = info.SelectToken ( "creationTime" ) == null ? "" : info.SelectToken ( "creationTime" ).ToString ( );
                    list.Add ( comment );
                }
            }
            NPOI.SS.UserModel.IWorkbook workbook=new NPOI.HSSF.UserModel.HSSFWorkbook();
            NPOI.SS.UserModel.ISheet sheet=workbook.CreateSheet("京东vivo评价数据");
            for ( var i = 0 ; i < list.Count ; i++ )
            {
                NPOI.SS.UserModel.IRow row = sheet.CreateRow ( i + 1 );
                if ( i == 0 )
                {
                    row.CreateCell ( 1 ).SetCellValue ( "userClientShow" );
                    row.CreateCell ( 2 ).SetCellValue ( "nickname" );
                    row.CreateCell ( 3 ).SetCellValue ( "userExpValue" );
                    row.CreateCell ( 4 ).SetCellValue ( "userLevelName" );
                    row.CreateCell ( 5 ).SetCellValue ( "referenceTime" );
                    row.CreateCell ( 6 ).SetCellValue ( "referenceName" );
                    row.CreateCell ( 7 ).SetCellValue ( "productColor" );
                    row.CreateCell ( 8 ).SetCellValue ( "productSize" );
                    row.CreateCell ( 9 ).SetCellValue ( "score" );
                    row.CreateCell ( 10 ).SetCellValue ( "content" );
                    row.CreateCell ( 11 ).SetCellValue ( "creationTime" );
                }
                else
                {
                    row.CreateCell ( 1 ).SetCellValue ( list [ i ].userClientShow );
                    row.CreateCell ( 2 ).SetCellValue ( list [ i ].nickname );
                    row.CreateCell ( 3 ).SetCellValue ( list [ i ].userExpValue );
                    row.CreateCell ( 4 ).SetCellValue ( list [ i ].userLevelName );
                    row.CreateCell ( 5 ).SetCellValue ( list [ i ].referenceTime );
                    row.CreateCell ( 6 ).SetCellValue ( list [ i ].referenceName );
                    row.CreateCell ( 7 ).SetCellValue ( list [ i ].productColor );
                    row.CreateCell ( 8 ).SetCellValue ( list [ i ].productSize );
                    row.CreateCell ( 9 ).SetCellValue ( list [ i ].score );
                    row.CreateCell ( 10 ).SetCellValue ( list [ i ].content );
                    row.CreateCell ( 11 ).SetCellValue ( list [ i ].creationTime );
                }
            }
            var path=AppDomain.CurrentDomain.BaseDirectory+"test.xls";
            using ( System.IO.FileStream file = new System.IO.FileStream ( path , System.IO.FileMode.Create ) )
            {
                workbook.Write ( file );
                file.Close ( );
            }
        }

        public static int Test(int x)
        {
            return x;
        }
    }
    public class PhonePrice
    {
        public string PhoneName { get; set; }
        public string Price { get; set; }
        public string SkuId { get; set; }
    }
    public class Comment
    {
        public string userClientShow { get; set; }
        public string nickname { get; set; }
        public string userExpValue { get; set; }
        public string userLevelName { get; set; }

        public string referenceTime { get; set; }
        public string referenceName { get; set; }
        public string productColor { get; set; }
        public string productSize { get; set; }

        public string score { get; set; }
        public string content { get; set; }
        public string creationTime { get; set; }
    }

 

以上是关于京东手机销售价格抓取的主要内容,如果未能解决你的问题,请参考以下文章

京东商品价格抓取

京东商品价格抓取

Python爬虫编程思想(70): 项目实战--抓取京东商城手机销售排行榜

Python爬虫编程思想(70): 项目实战--抓取京东商城手机销售排行榜

python爬虫获取天猫与京东的商品价格

如何爬取京东手机上万的商品数据,这个神器可以帮你