javascript 网站刮刀

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了javascript 网站刮刀相关的知识,希望对你有一定的参考价值。

/*

https://github.com/website-scraper/node-website-scraper
https://www.npmjs.com/package/website-scraper-2
https://scraper.nepochataya.pp.ua/

npm install website-scraper
*/

var scraper = require('website-scraper');
scraper({
  urls: [
    'http://rsworldpi.com/'
  ],
  recursive: true,
  maxRecursiveDepth: 1,
  filenameGenerator: 'bySiteStructure',
  directory: 'rsworldpi',
  subdirectories: [
    {directory: 'img', extensions: ['.jpg', '.png', '.svg']},
    {directory: 'js', extensions: ['.js']},
    {directory: 'css', extensions: ['.css']}
  ],
  sources: [
    {selector: 'img', attr: 'src'},
    {selector: 'link[rel="stylesheet"]', attr: 'href'},
    {selector: 'script', attr: 'src'}
  ],
  request: {
    headers: {
      'User-Agent': 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19'
    }
  }
}).then(function (result) {
  console.log('hello');
}).catch(function(err){
  console.log('fail' + err);
});

以上是关于javascript 网站刮刀的主要内容,如果未能解决你的问题,请参考以下文章

javascript 报告刮刀

javascript cheerio基本刮刀

JS 图像刮刀

使用python beautiful soup或html模块的电子邮件刮刀

Python BeautifulSoup 硒刮刀

text 网络刮刀