nodejs的爬虫

Posted 宝2333

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了nodejs的爬虫相关的知识,希望对你有一定的参考价值。

//爬取静态页面
const request = require (‘request‘);
const cheerio = require(‘cheerio‘);
const mysql = require(‘mysql‘);
var conn = mysql.createConnection({
    host:‘localhost‘,
    user:‘root‘,
    password:‘root‘,
    port:‘3306‘,
    database:‘xiaomi‘
});
// conn.connect();
request(‘https://www.epet.com/cleargoodsmdog.html‘,function(err,res,body){
    var $ = cheerio.load(body);
    var list = $(‘.qcGoodsBox.bgwhite .fl.rela‘);
    console.log(list);
    request(imgsrc).pipe(fs.createWriteStream(__dirname+"/downloadimg/"+path.parse(imgsrc).base));
    list.each(function(index){
        var pic = $(this).find(‘.cloud-zoom img‘).attr(‘src0‘);
        var title = $(this).find(‘.qcGoodsTit a‘).text();
        var price = $(this).find(‘.qcPriceBox .ft20‘).text();
        var yprice = $(this).find(‘.qcPriceBox .ft12‘).text();
        conn.query(‘insert into goods(goodsname,price,pic,goodsclass) values(?,?,?,?)‘,[title,price,pic,yprice],function(err,results,fields){
            console.log(results);
        })
    });
    conn.end();
})
//爬取动态数据用的是phantom
const phantom = require (‘phantom‘);
const cheerio = require (‘cheerio‘);
 
(async function() {
  const instance = await phantom.create();
  const page = await instance.createPage();
  await page.on(‘onResourceRequested‘, function(requestData) {
    console.info(‘Requesting‘, requestData.url);
  });
 
  const status = await page.open(‘http://you.163.com/item/list?categoryId=1065000&subCategoryId=1065001‘);
  const content = await page.property(‘content‘);
//   console.log(content);
  var $ = cheerio.load(content);
  var list = $(‘.m-itemList.m-itemList-level2Category .item‘);
  list.each(function(index){
        var title = $(this).find(‘.name a span:nth-of-type(3)‘).text();
        console.log(title);
    });
  

  await instance.exit();
})();

 

以上是关于nodejs的爬虫的主要内容,如果未能解决你的问题,请参考以下文章

scrapy按顺序启动多个爬虫代码片段(python3)

scrapy主动退出爬虫的代码片段(python3)

javascript 用于在节点#nodejs #javascript内设置react app的代码片段

NodeJS爬虫入门

nodejs实现一个简单的爬虫

nodeJS实现简易爬虫