Node.js抓取新浪新闻标题
Posted sheryee
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Node.js抓取新浪新闻标题相关的知识,希望对你有一定的参考价值。
"use strict"; let cheerio = require("cheerio"); let http = require("http"); let iconv = require("iconv-lite"); let mainUrl = "http://news.sina.com.cn/world/"; http.get(mainUrl, function(sres) { var chunks = []; sres.on(‘data‘, function(chunk) { chunks.push(chunk); }); sres.on(‘end‘, function() { var html = iconv.decode(Buffer.concat(chunks), ‘utf8‘); var $ = cheerio.load(html, {decodeEntities: false}); $(‘.content a‘).each(function (idx,element){ let ele = $(element); let title = ele.text(); title = !!title?title.trim():‘‘; let url = ele.attr(‘href‘); url = !!url?url.trim():‘‘; if(title.length>4 && url.length>0&&url.indexOf(‘javascript‘)===-1){ console.log(title+‘|‘+url); } }) }); });
以上是关于Node.js抓取新浪新闻标题的主要内容,如果未能解决你的问题,请参考以下文章
python爬虫:使用urllib.request和BeautifulSoup抓取新浪新闻标题链接和主要内容