nodejs 根据正则爬取本地文件夹里面内容
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了nodejs 根据正则爬取本地文件夹里面内容相关的知识,希望对你有一定的参考价值。
用了ndir组件
没有用nodejs的event模块 继承了上一篇文章的Event类
‘use strict‘ const ndir = require(‘ndir‘); const fs = require(‘fs‘); class Event{ constructor() { //保存事件列表 this.eventList = []; } on(key,fn){ if ( !this.eventList[ key ] ){ this.eventList[ key ] = []; } this.eventList[ key ].push( fn ); } trigger(){ var key = Array.prototype.shift.call(arguments), fns = this.eventList[ key ]; if ( !fns || fns.length === 0 ){ return false; } for( var i = 0, fn; fn = fns[ i++ ]; ){ fn.apply( this, arguments ); } } } class GetChinese extends Event{ constructor(path,saveOutPutPath,regs){ super(); this.path = path; this.saveOutPutPath = saveOutPutPath; this.regs = regs; this.outPathObj = {}; this.files = []; this.init(); } init(){ let me = this; me.on(‘read-file‘,(state)=>{ console.log(state) if(state === ‘err‘){ return false; } if(state === ‘finish‘){ me.readFils(); } }); me.on(‘read-files‘,(state)=>{ if(state === ‘finish‘){ fs.writeFile(me.saveOutPutPath,JSON.stringify(me.outPathObj),(err)=>{}); } }); me.readDir(); } readDir(){ let me = this; ndir.walk(this.path, function onDir(dirpath, files) { for (var i = 0, l = files.length; i < l; i++) { var info = files[i]; if (info[1].isFile()) { me.files.push(info[0]); } } }, function end() { me.trigger(‘read-file‘,‘finish‘); }, function error(err, errPath) { me.trigger(‘read-file‘,‘err‘) } ); } readFils(index=0){ let me = this; console.log(‘index‘,index); console.log(‘length‘,me.files.length) if(index>=me.files.length){ me.trigger(‘read-files‘,‘finish‘); return false; } fs.readFile(me.files[index],‘utf8‘,(err, data) => { if(err){ me.readFils(index+1); return false; } let result = []; while( result = me.regs.exec(data)) { me.outPathObj[result[0]] = result[0]; } me.readFils(index+1); }); } } //要爬去的文件夹路径 //保存爬取内容的文件路径 //匹配正则 let getChinese = new GetChinese(‘C:\\Users\\long\\Desktop\\123test‘,‘C:\\Users\\long\\Desktop\\123test.txt‘,/[^>]+[\u4E00-\u9FA5]+[^<]+/g);
<span>要的订单</span>
例子是获取html标签里面内容,例如上面得到 ‘要的订单’
简单实现功能,还有些错误没有处理
以上是关于nodejs 根据正则爬取本地文件夹里面内容的主要内容,如果未能解决你的问题,请参考以下文章
通过正则表达式python爬取指定网页中的参数内容,保存到指定数据文件中