javascript crawl.js用于构建CLI以使用web-crawljs文章对网页进行爬网

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了javascript crawl.js用于构建CLI以使用web-crawljs文章对网页进行爬网相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env node
/**
 * Created by kayslay on 5/31/17.
 */
const crawler = require('web-crawljs');
const program = require('commander');

//commander configuration
function list(val) {
    "use strict";
    return val.split(',');
}

program
    .option('-x --execute <string>', 'the configurtion to execute')
    .option('-d --depth [number]', 'the depth of the crawl')
    .option('-u --urls [items]', 'change the urls',list)
    .parse(process.argv);

//throw an error if the execute flag is not used
if (!program.execute) {
    throw new Error('the configuration to use must be set use the -x flag to define configuration;' +
        ' use the --help for help')
}
//holds the additional configuration that will be added to crawlConfig
const additionalConfig = {};

//set the object that will override the default crawlConfig
(function (config) {
    //depth
    if (program.depth) config['depth'] = program.depth;
    if(!!program.urls) config['urls'] = program.urls

})(additionalConfig);

//the action is the file name that holds the crawlConfig
let action = program.execute;


try {
    //set the crawlConfig 
    //adds the additional config if need
    let crawlConfig = Object.assign(require(`./config/${action}`), additionalConfig);
    const Crawler = crawler(crawlConfig);
    Crawler.CrawlAllUrl()
} catch (err) {
    console.error(`An Error occurred: ${err.message}`);
}

以上是关于javascript crawl.js用于构建CLI以使用web-crawljs文章对网页进行爬网的主要内容,如果未能解决你的问题,请参考以下文章

为啥我的代码无法与墨西哥一起构建但与 cl 一起使用?

将 javax.smartcardio 用于 MIFARE Classic 和 Omnikey 5021 CL

LLVM 在 Windows 上构建:跳过 Visual Studio cl.exe [重复]

React 用于构建用户界面的 JavaScript 库

用于在非 SPA 站点上构建可重用组件的 Javascript MVC 框架

@vue/cl构建得项目下,postcss.config.js配置,将px转化成rem