使用 PhantomJs 登录后如何获取下一页?

Posted

技术标签:

【中文标题】使用 PhantomJs 登录后如何获取下一页?【英文标题】:How get the next page after login with PhatomJs? 【发布时间】:2017-04-14 07:53:31 【问题描述】:

我在这里找到了很多关于这个的问题,但不知道为什么没有回答。

我在使用以下代码登录后尝试抓取网页:source

var steps=[];
var testindex = 0;
var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var webPage = require('webpage');
var page = webPage.create();
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/44.0.2403.157 Safari/537.36';
page.settings.javascriptEnabled = true;
page.settings.loadImages = false;//Script is much faster with this field set to false
phantom.cookiesEnabled = true;
phantom.javascriptEnabled = true;
/*********SETTINGS END*****************/

console.log('All settings loaded, start with execution');
page.onConsoleMessage = function(msg) 
    console.log(msg);
;
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/
steps = [

    //Step 1 - Open Amazon home page
    function()
        console.log('Step 1 - Abrindo página de login');
        page.open("http://parceriascury.housecrm.com.br", function(status)

        );
    ,
    //Step 3 - Populate and submit the login form
    function()
        console.log('Step 3 - Preenchendo o form');
        page.evaluate(function()
            document.getElementById("login").value="xxxxx";
            document.getElementById("senha").value="xxxxx";
            document.getElementById("frmlandingpage").submit();
        );
    ,
    //Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.
    function()
        console.log("Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
         var fs = require('fs');
         var result = page.evaluate(function() 
            return document.documentElement.outerHTML;
        );
        fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
    ,
];
/**********END STEPS THAT FANTOM SHOULD DO***********************/

//Execute steps one by one
interval = setInterval(executeRequestsStepByStep,5000);

function executeRequestsStepByStep()
    if (loadInProgress == false && typeof steps[testindex] == "function") 
        //console.log("step " + (testindex + 1));
        steps[testindex]();
        testindex++;
    
    if (typeof steps[testindex] != "function") 
        console.log("test complete!");
        phantom.exit();
    


/**
 * These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
 * Without this, we will get content of the page, even a page is not fully loaded.
 */
page.onLoadStarted = function() 
    loadInProgress = true;
    console.log('Loading started');
;
page.onLoadFinished = function() 
    loadInProgress = false;
    console.log('Loading finished');
;
page.onConsoleMessage = function(msg) 
    console.log(msg);
;

但只有这样的回应:

<html><head></head><body>ok</body></html>

我需要用 URL 获取下一页的内容:

http://parceriascury.housecrm.com.br/parceiro_busca

我可以直接访问这个页面,但不是所有的补充,因为它需要登录。

没有错误,我不知道我在哪里犯了错误。

编辑 欢迎其他解决方案,我想也许curl...但是在js加载之后...

对不起,我的英语不好。

【问题讨论】:

看起来您正在获得身份验证后框架,一个只显示“好的”的空白页面。添加计时器或选择器更改以确保您等待足够长的时间以进行重定向 是的,看起来像这样,但是当我使用手动方法登录时,登录后,重定向到另一个页面,我看不到ok:http://parceriascury.housecrm.com.br/parceiro_busca,你有一个小提示给我成功?多谢关注... 可以添加phantom.onError回调和page.onError 【参考方案1】:

这段代码可能会更好:

var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var page = require('webpage').create(viewportSize:width: 1600,height: 900,
settings:userAgent:'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
javascriptEnabled:'true',
loadImages:'false'
);
var fs = require('fs');
/*********SETTINGS END*****************/
console.log('All settings loaded, start with execution');

/**
 * These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
 * Without this, we will get content of the page, even a page is not fully loaded.
 */
page.onLoadStarted = function() 
    loadInProgress = true;
    console.log('Loading started');
;
page.onLoadFinished = function() 
    loadInProgress = false;
    console.log('Loading finished');
;
page.onConsoleMessage = function(msg) 
    console.log(msg);
;

//Log in to your account, then view the cookie you got, now you can use these cookies to login
   // the site will recognize you with your cookies.

//for freebitco.in auth
phantom.cookies = [// an array of objects
  'name'     : 'btc_address',   
  'value'    : '1AuMxR6sPtB2Z6TkahSnpmm1H4KpYPBKqe',  
  'domain'   : 'freebitco.in',        
  'path'     : '/',
  'httponly' : false,
  'secure'   : true,
  'expires'  : (new Date()).getTime() + (1000 * 60 * 60 * 43800) //5 years 
, 'name'     : 'password',   
  'value'    : 'f574ca68a8650d1264d38da4b7687ca3bf631e6dfc59a98c89dd2564c7601f84', 
  'domain'   : 'freebitco.in',        
  'path'     : '/',
  'httponly' : false,
  'secure'   : true,
  'expires'  : (new Date()).getTime() + (1000 * 60 * 60 * 43800) ]

//Execute steps one by one
page.open("http://parceriascury.housecrm.com.br/parceiro_busca", function(status)
console.log('Step 1 has been completed - we are on the target page!');
setTimeout(step2,5000);// Maybe we don't need to wait here, we can execute step2 immediately.
function step2()
console.log("Step 2 - Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
var result = page.evaluate(function() return document.documentElement.outerHTML; );
fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
phantom.exit(); 

);

【讨论】:

你能解释一下为什么上面的代码比OP发布的更好吗? @Igor Tks 寻求帮助,但我改变了你悲伤的所有事情,并且代码像其他一样工作......回复&lt;html&gt;&lt;head&gt;&lt;/head&gt;&lt;body&gt;ok&lt;/body&gt;&lt;/html&gt;,我不知道更多......跨度> Phantomjs 不是为了这个目标? tks 很多人...这个 cokkies 救了我的命,,,, 不能 +2 ...tks 再次 @MagicHat 可以做得更短,因为phantom.cookies 不是document.cookie - 我们根本不需要中间页面。而且,可以使用location 代替window.location.href

以上是关于使用 PhantomJs 登录后如何获取下一页?的主要内容,如果未能解决你的问题,请参考以下文章

如何获取下一页的页面源

单击登录后如何重定向到下一页并检查用户凭据是不是正确

如何在 PhantomJS 中停止/中止/取消页面加载?

如何在我的应用程序中使用 Facebook 登录并在成功登录后移至下一页/视图控制器?

表单验证后如何将用户发送到下一页?

使用Spring Boot Starter安全性中的Custom登录页面成功登录,没有进入下一页