1. 程式人生 > >phantomjs 開發爬蟲框架

phantomjs 開發爬蟲框架

sel true count his require c118 nconf tel started

函數

  • page.childframescount
  • page.childframesname
  • page.close
  • page.currentframename
  • page.deletelater
  • page.destroyed
  • page.evaluate
  • page.initialized
  • page.injectjs
  • page.javascriptalertsent
  • page.javascriptconsolemessagesent
  • page.loadfinished
  • page.loadstarted
  • page.openurl
  • page.release
  • page.render
  • page.resourceerror
  • page.resourcereceived
  • page.resourcerequested
  • page.uploadfile
  • page.sendevent
  • page.setcontent
  • page.switchtochildframe
  • page.switchtomainframe
  • page.switchtoparentframe
  • page.addcookie
  • page.deletecookie
  • page.clearcookies

回調處理程序/

列表中的所有頁面的事件:

  • oninitialized
  • onloadstarted
  • onloadfinished
  • onurlchanged
  • onnavigationrequested
  • onrepaintrequested
  • onresourcerequested
  • onresourcereceived
  • onresourceerror
  • onresourcetimeout
  • onalert
  • onconsolemessage
  • onclosing
page.onInitialized = function() {
console.log("page.onInitialized");
printArgs.apply(this, arguments);
};
page.onLoadStarted = function() {
console.log("page.onLoadStarted");
printArgs.apply(this, arguments);
};
page.onLoadFinished = function() {
console.log("page.onLoadFinished");
printArgs.apply(this, arguments);
};
page.onUrlChanged = function() {
console.log("page.onUrlChanged");
printArgs.apply(this, arguments);
};
page.onNavigationRequested = function() {
console.log("page.onNavigationRequested");
printArgs.apply(this, arguments);
};
page.onRepaintRequested = function() {
console.log("page.onRepaintRequested");
printArgs.apply(this, arguments);
};
if (logResources === true) {
page.onResourceRequested = function() {
console.log("page.onResourceRequested");
printArgs.apply(this, arguments);
};
page.onResourceReceived = function() {
console.log("page.onResourceReceived");
printArgs.apply(this, arguments);
};
}
page.onClosing = function() {
console.log("page.onClosing");
printArgs.apply(this, arguments);
};
// window.console.log(msg);
page.onConsoleMessage = function() {
console.log("page.onConsoleMessage");
printArgs.apply(this, arguments);
};
// window.alert(msg);
page.onAlert = function() {
console.log("page.onAlert");
printArgs.apply(this, arguments);
};
// var confirmed = window.confirm(msg);
page.onConfirm = function() {
console.log("page.onConfirm");
printArgs.apply(this, arguments);
};
// var user_value = window.prompt(msg, default_value);
page.onPrompt = function() {
console.log("page.onPrompt");
printArgs.apply(this, arguments);
};
////////////////////////////////////////////////////////////////////////////////
setTimeout(function() {
console.log("");
console.log("### STEP 1: Load ‘" + step1url + "‘");
page.open(step1url);
}, 0);
setTimeout(function() {
console.log("");
console.log("### STEP 2: Load ‘" + step2url + "‘ (load same URL plus FRAGMENT)");
page.open(step2url);
}, 5000);
setTimeout(function() {
console.log("");
console.log("### STEP 3: Click on page internal link (aka FRAGMENT)");
page.evaluate(function() {
var ev = document.createEvent("MouseEvents");
ev.initEvent("click", true, true);
document.querySelector("a[href=‘#Event_object‘]").dispatchEvent(ev);
});
}, 10000);
setTimeout(function() {
console.log("");
console.log("### STEP 4: Click on page external link");
page.evaluate(function() {
var ev = document.createEvent("MouseEvents");
ev.initEvent("click", true, true);
document.querySelector("a[title=‘JavaScript‘]").dispatchEvent(ev);
});
}, 15000);
setTimeout(function() {
console.log("");
console.log("### STEP 5: Close page and shutdown (with a delay)");
page.close();
setTimeout(function(){
phantom.exit();
}, 100);
}, 20000);

網絡監控

var page = require(webpage).create();
page.onResourceRequested = function(request) {
  console.log(Request  + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function(response) {
  console.log(Receive  + JSON.stringify(response, undefined, 4));
};
page.open(url);

phantomjs 開發爬蟲框架