phantomjs 开发爬虫框架
Posted 肥肥鱼与鱼
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了phantomjs 开发爬虫框架相关的知识,希望对你有一定的参考价值。
函数
- page.childframescount
- page.childframesname
- page.close
- page.currentframename
- page.deletelater
- page.destroyed
- page.evaluate
- page.initialized
- page.injectjs
- page.javascriptalertsent
- page.javascriptconsolemessagesent
- page.loadfinished
- page.loadstarted
- page.openurl
- page.release
- page.render
- page.resourceerror
- page.resourcereceived
- page.resourcerequested
- page.uploadfile
- page.sendevent
- page.setcontent
- page.switchtochildframe
- page.switchtomainframe
- page.switchtoparentframe
- page.addcookie
- page.deletecookie
- page.clearcookies
回调处理程序/
列表中的所有页面的事件:
- oninitialized
- onloadstarted
- onloadfinished
- onurlchanged
- onnavigationrequested
- onrepaintrequested
- onresourcerequested
- onresourcereceived
- onresourceerror
- onresourcetimeout
- onalert
- onconsolemessage
- onclosing
page.onInitialized = function() { | |
console.log("page.onInitialized"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onLoadStarted = function() { | |
console.log("page.onLoadStarted"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onLoadFinished = function() { | |
console.log("page.onLoadFinished"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onUrlChanged = function() { | |
console.log("page.onUrlChanged"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onNavigationRequested = function() { | |
console.log("page.onNavigationRequested"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onRepaintRequested = function() { | |
console.log("page.onRepaintRequested"); | |
printArgs.apply(this, arguments); | |
}; | |
if (logResources === true) { | |
page.onResourceRequested = function() { | |
console.log("page.onResourceRequested"); | |
printArgs.apply(this, arguments); | |
}; | |
page.onResourceReceived = function() { | |
console.log("page.onResourceReceived"); | |
printArgs.apply(this, arguments); | |
}; | |
} | |
page.onClosing = function() { | |
console.log("page.onClosing"); | |
printArgs.apply(this, arguments); | |
}; | |
// window.console.log(msg); | |
page.onConsoleMessage = function() { | |
console.log("page.onConsoleMessage"); | |
printArgs.apply(this, arguments); | |
}; | |
// window.alert(msg); | |
page.onAlert = function() { | |
console.log("page.onAlert"); | |
printArgs.apply(this, arguments); | |
}; | |
// var confirmed = window.confirm(msg); | |
page.onConfirm = function() { | |
console.log("page.onConfirm"); | |
printArgs.apply(this, arguments); | |
}; | |
// var user_value = window.prompt(msg, default_value); | |
page.onPrompt = function() { | |
console.log("page.onPrompt"); | |
printArgs.apply(this, arguments); | |
}; | |
//////////////////////////////////////////////////////////////////////////////// | |
setTimeout(function() { | |
console.log(""); | |
console.log("### STEP 1: Load ‘" + step1url + "‘"); | |
page.open(step1url); | |
}, 0); | |
setTimeout(function() { | |
console.log(""); | |
console.log("### STEP 2: Load ‘" + step2url + "‘ (load same URL plus FRAGMENT)"); | |
page.open(step2url); | |
}, 5000); | |
setTimeout(function() { | |
console.log(""); | |
console.log("### STEP 3: Click on page internal link (aka FRAGMENT)"); | |
page.evaluate(function() { | |
var ev = document.createEvent("MouseEvents"); | |
ev.initEvent("click", true, true); | |
document.querySelector("a[href=‘#Event_object‘]").dispatchEvent(ev); | |
}); | |
}, 10000); | |
setTimeout(function() { | |
console.log(""); | |
console.log("### STEP 4: Click on page external link"); | |
page.evaluate(function() { | |
var ev = document.createEvent("MouseEvents"); | |
ev.initEvent("click", true, true); | |
document.querySelector("a[title=‘JavaScript‘]").dispatchEvent(ev); | |
}); | |
}, 15000); | |
setTimeout(function() { | |
console.log(""); | |
console.log("### STEP 5: Close page and shutdown (with a delay)"); | |
page.close(); | |
setTimeout(function(){ | |
phantom.exit(); | |
}, 100); | |
}, 20000); |
网络监控
var page = require(‘webpage‘).create(); page.onResourceRequested = function(request) { console.log(‘Request ‘ + JSON.stringify(request, undefined, 4)); }; page.onResourceReceived = function(response) { console.log(‘Receive ‘ + JSON.stringify(response, undefined, 4)); }; page.open(url);
以上是关于phantomjs 开发爬虫框架的主要内容,如果未能解决你的问题,请参考以下文章
[Python3网络爬虫开发实战] 1.2.5-PhantomJS的安装
Python爬虫(二十一)_Selenium与PhantomJS