phantomjs搭建輸出js渲染後的html接口
阿新 • • 發佈:2019-02-11
mage 輸出 itl asc cin rep 模式 none settings
接觸過爬蟲的人都知道,有很多html是js渲染後才有數據給予解析的,php執行JavaScript可以有V8 javascript engine,要想獲取渲染後的html也可以直接使用
php的擴展 querylist 采集,文檔:
https://www.querylist.cc/docs/guide/v4/PhantomJS
不過這裏想聊聊phantomjs的api接口文檔,以前只會命令行模式的phantomjs指令 + php的exec函數執行JavaScript代碼
phantomjs的api:
http://phantomjs.org/api/
接口功能:提供一個接口地址,傳入網址返回渲染後的html(字符串)
使用webserver模式:phantomjs可執行文件執行js文件,就會監聽端口,當有請求進來之後就會返回渲染後的html
var webserver = require(‘webserver‘); var server = webserver.create(); var pageInfo = {}; var action = {‘openUrl‘:true,‘parseJs‘:true}; var service = server.listen(8888, function(request, response) { post = request.post;View Codevar opera = (typeof post.action != ‘undefined‘ && typeof action[post.action] != ‘undefined‘) ? post.action : ‘openUrl‘; var webPage = require(‘webpage‘); var page = webPage.create(); if(opera == ‘openUrl‘) {// 打開URL頁面 pageInfo[‘url‘] = (typeof post.url != ‘undefined‘) ? post.url : ‘‘; pageInfo[‘postData‘] = (typeof post.postData != ‘undefined‘) ? post.postData : ‘‘; pageInfo[‘loadSource‘] = (typeof post.loadSource != ‘undefined‘ && post.loadSource == ‘no‘) ? false : true; var body = ‘‘; if(pageInfo[‘loadSource‘] === false) { page.settings.javascriptEnabled = false; page.settings.loadImages = false; } if(pageInfo.url) { page.open(pageInfo.url, function(status) { body = page[‘content‘]; console.log(‘url:‘+pageInfo.url); console.log(‘Status: ‘ + status); response.statusCode = 200; response.write(body); response.close(); }) } else { response.statusCode = 200; response.write(‘請傳URL‘); response.close(); } } else if(opera == ‘parseJs‘) {// 解析JS page.open(‘‘, function(status) { var code = "eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!‘‘.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return‘\\w+‘};c=1};while(c--){if(k[c]){p=p.replace(new RegExp(‘\\b‘+e(c)+‘\\b‘,‘g‘),k[c])}}return p}(‘e i={\‘g\‘:6,\‘l\‘:\‘豪門第一盛婚\‘,\‘f\‘:\‘/6/k\‘,\‘d\‘:\‘/6/8\‘,\‘7\‘:\‘/6/9\‘,\‘a\‘:\‘0話\‘,\‘c\‘:[\‘/3/2/h/5/0/b.4\‘,\‘/3/2/h/5/0/j.4\‘,\‘/3/2/h/5/0/w.4\‘,\‘/3/2/h/5/0/v.4\‘,\‘/3/2/h/5/0/u.4\‘,\‘/3/2/h/5/0/m.4\‘,\‘/3/2/h/5/0/x.4\‘,\‘/3/2/h/5/0/y.4\‘,\‘/3/2/h/5/0/t.4\‘,\‘/3/2/h/5/0/s.4\‘,\‘/3/2/h/5/0/o.4\‘],\‘n\‘:p,\‘q\‘:\‘/6/\‘,\‘r\‘:1};‘,35,35,‘116||tuku|ManHuaKu|jpg|haomendiyishenghun|25651|pcid|0120|0118|ctitle|1612|fs|ncid|var|cid|bid||cInfo|1613|0119|btitle|1617|fc|kz5thyicgkd1622|11|burl|bs|xuaghwz33ar1621|1620|1616|1615|1614|1618|1619‘.split(‘|‘),0,{}))"; code = code.replace(/^eval/, ‘‘); var title = page.evaluate(function(s) { return eval("(function(p,a,c,k,e,d){e=function(c){return(c<a?‘‘:e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!‘‘.replace(/^/,String)){while(c--)d[e(c)]=k[c]||e(c);k=[function(e){return d[e]}];e=function(){return‘\\w+‘};c=1};while(c--)if(k[c])p=p.replace(new RegExp(‘\\b‘+e(c)+‘\\b‘,‘g‘),k[c]);return p}(‘x l={\‘q\‘:f,\‘j\‘:\‘豪門第一盛婚\‘,\‘m\‘:\‘/f/3\‘,\‘u\‘:\‘/f/2\‘,\‘v\‘:\‘/f/0\‘,\‘n\‘:\‘5話\‘,\‘p\‘:[\‘/g/w/q/r/5/6.s\‘,\‘/g/w/q/r/5/7.s\‘,\‘/g/w/q/r/5/8.s\‘,\‘/g/w/q/r/5/9.s\‘,\‘/g/w/q/r/5/a.s\‘,\‘/g/w/q/r/5/b.s\‘,\‘/g/w/q/r/5/c.s\‘,\‘/g/w/q/r/5/d.s\‘,\‘/g/w/q/r/5/e.s\‘,\‘/g/w/q/r/5/y.s\‘,\‘/g/w/q/r/5/t.s\‘],\‘o\‘:4,\‘k\‘:\‘/f/\‘,\‘i\‘:3};‘,62,35,‘0118|0119|0120|1|11|116|1612|1613|1614|1615|1616|1617|1618|1619|1620|25651|ManHuaKu|bid|bs|btitle|burl|cInfo|cid|ctitle|fc|fs|h|haomendiyishenghun|jpg|kz5thyicgkd1622|ncid|pcid|tuku|var|xuaghwz33ar1621‘.split(‘|‘),0,{}))"); }, code); //console.log(code) console.log(title) //response.statusCode = 200; //response.write(eval(code)); //response.close(); phantom.exit(); }); } });
註意監聽的端口不要已經被使用了哦
phantomjs搭建輸出js渲染後的html接口