nodejs的爬虫
Posted 宝2333
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了nodejs的爬虫相关的知识,希望对你有一定的参考价值。
//爬取静态页面 const request = require (‘request‘); const cheerio = require(‘cheerio‘); const mysql = require(‘mysql‘); var conn = mysql.createConnection({ host:‘localhost‘, user:‘root‘, password:‘root‘, port:‘3306‘, database:‘xiaomi‘ }); // conn.connect(); request(‘https://www.epet.com/cleargoodsmdog.html‘,function(err,res,body){ var $ = cheerio.load(body); var list = $(‘.qcGoodsBox.bgwhite .fl.rela‘); console.log(list); request(imgsrc).pipe(fs.createWriteStream(__dirname+"/downloadimg/"+path.parse(imgsrc).base)); list.each(function(index){ var pic = $(this).find(‘.cloud-zoom img‘).attr(‘src0‘); var title = $(this).find(‘.qcGoodsTit a‘).text(); var price = $(this).find(‘.qcPriceBox .ft20‘).text(); var yprice = $(this).find(‘.qcPriceBox .ft12‘).text(); conn.query(‘insert into goods(goodsname,price,pic,goodsclass) values(?,?,?,?)‘,[title,price,pic,yprice],function(err,results,fields){ console.log(results); }) }); conn.end(); })
//爬取动态数据用的是phantom const phantom = require (‘phantom‘); const cheerio = require (‘cheerio‘); (async function() { const instance = await phantom.create(); const page = await instance.createPage(); await page.on(‘onResourceRequested‘, function(requestData) { console.info(‘Requesting‘, requestData.url); }); const status = await page.open(‘http://you.163.com/item/list?categoryId=1065000&subCategoryId=1065001‘); const content = await page.property(‘content‘); // console.log(content); var $ = cheerio.load(content); var list = $(‘.m-itemList.m-itemList-level2Category .item‘); list.each(function(index){ var title = $(this).find(‘.name a span:nth-of-type(3)‘).text(); console.log(title); }); await instance.exit(); })();
以上是关于nodejs的爬虫的主要内容,如果未能解决你的问题,请参考以下文章