如何使用 Python 登录网站进行抓取
Posted
技术标签:
【中文标题】如何使用 Python 登录网站进行抓取【英文标题】:How do I Login to A site using Python for scraping purposes 【发布时间】:2016-11-29 17:12:09 【问题描述】:我对 python 和编程比较陌生。我尝试按照此处类似问题提供的步骤进行操作,但我的程序无法成功登录。我从How to scrape a website which requires login using python and beautifulsoup? 获得的最新代码 以下是我尝试的代码:以及我得到的响应
import mechanize
import BeautifulSoup
import urllib2
import cookielib
cj = cookielib.CookieJar()
br = mechanize.Browser()
br.set_cookiejar(cj)
br.open("http://www.bbnplace.com/accounts_v2/?do=signin&service=prepaidsms&returnto=http%3A%2F%2Fsms.bbnplace.com%2Fsentdir.php")
br.select_form('blogin')
br.form['busername'] = 'my_username'
br.form['passwd'] = 'my_password'
response = br.submit()
print br.response().read()
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="keywords" content="bulksms, bulk sms, bulk sms gateway, cheap bulk sms, bulk sms provider, bulk sms to nigeria, send bulk sms, personalized bulk sms, bulk sms nigeria, best sms site in nigeria, nigerian bulks sms gateway, web to mobile sms" />
<meta http-equiv="description" content="Send bulk sms with personalized sender name to all GSM networks in Nigeria and over 800 networks in 160 countries. Nigeria's Best SMS Gateway." />
<title>BBN SMS Messenger: Retail Web to Mobile Bulk SMS Messaging Utility</title>
<link rel="stylesheet" href="http://www.bbnplace.com/accounts_v2/style/layout.css" type="text/css" media="screen" />
<script type="text/javascript" async src="http://www.bbnplace.com/accounts_v2/bbjs/un.js"></script>
<script type="text/javascript">var L;var C;var bw=0;var G;var eD;var am;var eE=0;var method;var url;var J;var be;var fp='';var eY='';var dW=0;var eH='dG';var eG;var action='sent';var bS=0;var eC='';var eW=new Number();var bP='';var ft='save';var host=document.domain=='localhost'?'localhost/smsmessenger/':'sms.bbnplace.com';var protocol='http';var fc;var eA=protocol+"://"+host+"/source/";var dR=0;var responseText;var response=new Array();var bs;var bV=new Number();var H;var request;var fl;var aB;var aQ;var aL=window.innerWidth;var ab=window.innerHeight;var ca=new Array();var aK=new Array();var cv=new Array();var bb=new Array();var dN=new Array();aB=document.getElementById('aD');var et=110;aQ=document.getElementById('bx');if(request=='gocheck')ad();function ak(v)var k=document.getElementById(v);k.style.visibility='hidden';k.style.height='0px';;function aE(v)var k=document.getElementById(v);k.style.visibility='inherit';k.style.height='auto';;function bJ(v,ba)var ids=new Array();var aw=new String();aw=ba;ids=aw.split(',');for(var i=0;i<ids.length;i++)ak(ids[i]);ids='';aE(v);return;;function ay(bF,cF,dw,bH,bi,aq,bN,aA)be=bi;C=bH;G=aq;H=aA;J=dw;method=bF;url=cF;am=bN;L=bO();if(L)if(method=='post')tryL.open(method,url,true);L.onreadystatechange=bg;L.setRequestHeader('Content-Type','application/x-www-form-urlencoded');L.setRequestHeader('Content-Length',J.length);L.setRequestHeader('Connection','close');L.send(J);catch(e)alert("Error connecting to server: "+e.toString());elsetryJ.length>1?aG=url+'?'+J:aG=url;L.open(method,aG,true);L.onreadystatechange=bg;L.send(null);catch(e)alert("Could not connect to server: "+e.toString());;function bg()var d;if(C.length>0)d=document.getElementById(C);if(L.readyState==4)if(L.status==200)tryvar response=dO();if(C.length>0)d.style.visibility='hidden';bw=0;catch(e)d.innerHTML="Error reading server response: "+e.toString();elseif(C.length>0)if(L.status)d.innerHTML='Server Response: '+L.status+' - '+L.statusText;d.style.visibility='visible';elsed.innerHTML='Connection to server failed. Retrying...';d.style.visibility='visible';bQ=setTimeout('bz()',5000);elseif(C.length>0)d.innerHTML='<img border="0" src="style/29.gif" align="absmiddle" /> <b>Loading...</b>';d.style.visibility='visible';ap=setTimeout('dK()',(be*1000));return response;;function dK()if(L.readyState!=4)clearTimeout(ap);document.getElementById(C).innerHTML='Connection is too slow. Retrying...';bQ=setTimeout('bD()',5000);;function bD()clearTimeout(bQ);if(L.readyState!=4)L.abort();ay(method,url,J,C,be,G,H);;function bz()clearTimeout(bQ);if(!L.status)L.abort();ay(method,url,J,C,be,G,H);;function df()x=document.getElementById(C);x.style.visibility='hidden';;function dO()var responseText,cp;var doctype=L.getResponseHeader('Content-Type').toString();var l=document.getElementById(G);if(H.length)var aJ=document.getElementById(H);l.innerHTML='';response['type']=doctype;if(doctype=='text/plain'||doctype=='text/html')responseText=L.responseText;if(responseText.substr(0,4)=='Err:')l.innerHTML=responseText.substr(5);l.style.visibility='inherit';l.style.height='auto';if(responseText.substr(5)=='')if(bS>0)l.innerHTML='<span style="color:red;">'+bP+'</span>';else if(responseText.substr(0,3)=='OK:')var aH=responseText.substr(4);switch(request)case 'newpost':document.forms[request].es.value=aH;bs.innerHTML='Saved';eu();break;elseif(H.length)aJ.innerHTML=responseText;else if(request=='gocheck')l.innerHTML=responseText;y=document.forms.di;if(responseText=='available')y.cf.value=1;y.ds.value=y.username.value;l.innerHTML='Available';l.style.fontWeight='bold';l.style.color='green';ad();elseif(responseText=='successful')switch(request)case 'newRequest':bJ('h','h,j');break;else if(responseText=='denied')bf=protocol+'://'+host+'/accounts';window.location=bf;elsealert(responseText);;function bO()tryL=new XMLHttpRequest();catch(e)var aI=new Array('MSXML2.XMLHTTP.6.0','MSXML2.XMLHTTP.5.0','MSXML2.XMLHTTP.4.0','MSXML2.XMLHTTP.3.0','MSXML2.XMLHTTP','Microsoft.XMLHTTP');for(var i=0;i<aI.length&& !L;i++)tryL=new ActiveXObject(aI[i]);catch(e)if(!L)alert('Please Upgrade your web browser');elsereturn L;;function aN()var x=document.forms['t'];request='newRequest';aq='V';var bp,ao,aW,az;bp=x.ag.value;ao=x.bI.value;aW=x.aR.value;dr=x.an.value;az=x.av.value;aZ='eml='+encodeURIComponent(ao)+'&f='+encodeURIComponent(bp)+'&p='+encodeURIComponent(aW)+'&m='+encodeURIComponent(az)+'&p2='+encodeURIComponent(dr);al=document.getElementById('V');al.style.visibility='hidden';al.style.height=0;dP='source/contact.php';ay('post',dP,aZ,'',30,'V','','');;function cu()dJ();;function cl()ce();;function bB(aX)var x=document.getElementById('bh');if(!x)var x=document.createElement('div');x.setAttribute('id','bh');x.style.backgroundColor='#000';x.style.top='0px';x.style.left='0px';x.style.position='fixed';x.style.zIndex=99999;x.style.opacity=0.5;x.style.width=aL+'px';x.style.height=ab+'px';x.style.visibility='visible';document.body.appendChild(x);if(aX==null)var T=document.getElementById('t');T.style.visibility='visible';T.style.height='auto';T.style.position='fixed';T.style.left=(aL/2-250)+'px';T.style.width='500px';T.style.zIndex=10000001;bJ('j','h,j');elsevar T=document.getElementById('ax');T.style.visibility='visible';bV=aX;;function ar()var bE=document.getElementById('bh');bE.style.width='0px';bE.style.height='0px';bE.style.visibility='hidden';if(bV==0)ak('h');ak('j');ak('t');document.forms['t'].reset();elsevar T=document.getElementById('ax');T.style.visibility='hidden';bV=0;;function cU(i)var aY=document.getElementById('bA');aY.innerHTML='<b>Message:</b><br />'+aK[i];aY.innerHTML+='<br /><br /><b>Broadcasted:</b> '+bb[i];document.getElementById('as').style.visibility='visible';ez(ca[i]);;function dA()document.getElementById('as').style.visibility='hidden';document.getElementById('ae').innerHTML='';</script>
</head>
<body><div id="fb-root"></div>
<script>(function(d, s, id)
var js, fjs = d.getElementsByTagName(s)[0];
if (d.getElementById(id)) return;
js = d.createElement(s); js.id = id;
js.src = "//connect.facebook.net/en_US/all.js#xfbml=1&appId=174240488183";
fjs.parentNode.insertBefore(js, fjs);
(document, 'script', 'facebook-jssdk'));</script><div id="ppbd"></div><div id="ppbdc"></div><div id="logoholder"></div>
<div id="app_header">
<div id="line1_left"><img src="http://www.bbnplace.com/accounts_v2/style/smlogo.png" align="absmiddle" title="BBN SMS Messenger: Retail Web to Mobile Bulk SMS Messaging Utility" /></div>
<div id="line1_right"><a href="#">Hi Buddie!</a> | <a href="http://sms.bbnplace.com">Messaging Solutions</a> | <a href="http://www.bbnplace.com">BBN</a> </div>
</div>
<div id="cK" align="right"> </div><div id="workarea" align="center">
<div id="frm_right">
<div id="note_panel"><div align="left">
<!-- <div style="margin: 0 0 40px; font-size: 22px; line-height: 27px;">
manage your contacts • send sms to <b style="color: red;">groups</b>
• from personal computer and <b style="color: red;">mobile</b>
</div> -->
<div style="font-size: 28px; line-height: 35px; margin: 35px 0 0;">
Send bulk sms • <b style="color: red;">confidently!</b>
</div>
<div id="bslogin_nav">
<img src="style/cc_icon.png" align="absmiddle" />
<b><a href="#how_to_recharge">Learn How to Recharge</a></b> <img
align="absmiddle" src="style/cog_icon.png" />
<b><a
href="http://www.bbnplace.com/documentation/?service=prepaidsms&article=networks&returnto="
target="_blank">See our Network Coverage</a></b>
</div>
<script type="text/javascript">function dJ()var x=document.forms['aN'];var V=new Number(x.df.value);var v=new Number(0);if(isNaN(V))document.getElementById("J").innerHTML="<span style='color: red;'>waiting...</span>";document.getElementById("hBp").innerHTML="<span style='color: red;'>waiting...</span>";document.getElementById("T").innerHTML="<span style='color: red;'>waiting...</span>";if(document.forms.F)document.forms.F.bJ.disabled=true;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML="<span style='color: red;'><strong>Please Enter a Numberic Value</strong></span>";if(V<50)document.getElementById("J").innerHTML="<span style='color: red;'>waiting...</span>";document.getElementById("hBp").innerHTML="<span style='color: red;'>waiting...</span>";document.getElementById("T").innerHTML="<span style='color: red;'>waiting...</span>";if(document.forms.F)document.forms.F.bJ.disabled=true;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML="<span style='color: red;'><strong>Please specify a minimum of 50</strong></span>";else if(V>=50&&V< 1000)document.getElementById("J").innerHTML="Teams & Groups";document.getElementById("hBp").innerHTML="NGN 3.50";v=V* 3.50;v=v.toFixed(2);document.getElementById("T").innerHTML='NGN '+v;if(document.forms.F)document.forms.F.bJ.disabled=false;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML='';if(document.forms.F)document.forms.F.J.value='Teams & Groups';document.forms.F.hBp.value= 3.50;document.forms.F.T.value=v;else if(V>= 1000 &&V< 10000)document.getElementById("J").innerHTML="Business Standard";document.getElementById("hBp").innerHTML="NGN 2.20";v=V* 2.20;v=v.toFixed(2);document.getElementById("T").innerHTML='NGN '+v;if(document.forms.F)document.forms.F.bJ.disabled=false;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML='';if(document.forms.F)document.forms.F.J.value='Business Standard';document.forms.F.hBp.value= 2.20;document.forms.F.T.value=v;else if(V>= 10000 &&V< 50000)document.getElementById("J").innerHTML="Business Professional";document.getElementById("hBp").innerHTML="NGN 2.00";v=V* 2.00;v=v.toFixed(2);document.getElementById("T").innerHTML='NGN '+v;if(document.forms.F)document.forms.F.bJ.disabled=false;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML='';if(document.forms.F)document.forms.F.J.value='Business Professional';document.forms.F.hBp.value= 2.00;document.forms.F.T.value=v;else if(V>= 50000)document.getElementById("J").innerHTML="Business Premium";document.getElementById("hBp").innerHTML="NGN 1.85";v=V* 1.85;v=v.toFixed(2);document.getElementById("T").innerHTML='NGN '+v;if(document.forms.F)document.forms.F.bJ.disabled=false;if(document.getElementById("statMsg"))document.getElementById("statMsg").innerHTML='';if(document.forms.F)document.forms.F.J.value='Business Premium';document.forms.F.hBp.value= 1.85;document.forms.F.T.value=v; </script>
<div align="left" id="pricelist">
<div align="right"><div id="price_tag"><img src="http://www.bbnplace.com/accounts_v2/style/price_tag.png" align="top" /> Bundles & Pricing</div></div>
<form id="aN" name="aN" method="post" action="">
<div style="margin: 0 0 5px;">
<label for="select"></label> <b>Currency</b> <select
name="aD" id="aD" onchange="ck(this.value)" disabled="disabled">
<option value="ngn" selected="selected" >NGN</option>
<option value="usd" >USD</option>
<option value="eur" >EUR</option>
</select>
</div>
<div>
<table cellpadding="3" cellspacing="0" id="price_tags">
<tr>
<th align="left">Bundle</th>
<th align="right">Min. Volume</th>
<th align="right">Unit Price</th>
</tr> <tr>
<td style="background-color: #FFF; white-space:nowrap; overflow:hidden; text-overflow: clip;">Teams & Groups</td>
<td align="right" style="background-color: #FFF;">50</td>
<td align="right" style="background-color: #FFF;">3.50</td>
</tr> <tr>
<td style="background-color: #FAFAFA; white-space:nowrap; overflow:hidden; text-overflow: clip;">Business Standard</td>
<td align="right" style="background-color: #FAFAFA;">1,000</td>
<td align="right" style="background-color: #FAFAFA;">2.20</td>
</tr> <tr>
<td style="background-color: #FFF; white-space:nowrap; overflow:hidden; text-overflow: clip;">Business Professional</td>
<td align="right" style="background-color: #FFF;">10,000</td>
<td align="right" style="background-color: #FFF;">2.00</td>
</tr> <tr>
<td style="background-color: #FAFAFA; white-space:nowrap; overflow:hidden; text-overflow: clip;">Business Premium</td>
<td align="right" style="background-color: #FAFAFA;">50,000</td>
<td align="right" style="background-color: #FAFAFA;">1.85</td>
</tr> </table>
</div>
<div style="margin: 30px 0;">
Specify sms volume in the space below to get pricing <br />
<div>
<table border="0" cellpadding="2" cellspacing="0"
id="aH">
<tr>
<th >Volume</th>
<th >Bundle</th>
<th align="right">Unit Price</th>
<th align="right">Price</th>
</tr>
<tr>
<td bgcolor="#FEFEFE"><input name="df" type="text" value=""
size="10" maxlength="10" onkeyup="dJ()" /></td>
<td bgcolor="#FEFEFE"><div id="J">
<input name="J" type="hidden" id="J" value="" />
</div></td>
<td align="right"><div id="hBp">
<input name="hBp" type="hidden" id="hBp" value="" />
</div></td>
<td align="right"><div id="T"><a name="how_to_recharge"></a>
<input name="T" type="hidden" id="T" value="" />
</div></td>
</tr>
</table>
</div>
</div>
</form>
IMPORTANT: *<strong>Business Premium bundle</strong> is the only <span
style="color: red; font-weight: bold;">negotiable</span> bundle.
</div><div align="left" style="margin: 50px 0 0;">
<h2>How to Recharge</h2>
<p>Pay online with any Nigerian debit/credit card, or at any branch of the listed banks:</p>
<div>
<img src="http://www.bbnplace.com/checkout/image/webpaymentgateways.gif" />
</div>
<div style="font-size: 14px;">
Account Name: <b>Browser Based Nigeria</b>
</div>
<div style="clear: both; width: 100%;">
<div id="bM" align="center">
<img src="http://www.bbnplace.com/sms/media/images/zenith.jpeg"
/><br />1012259075
</div>
<div id="bM" align="center">
<img src="http://www.bbnplace.com/sms/media/images/gtblogo.gif"
/><br />0008382123
</div>
<div id="bM" align="center">
<img src="http://www.bbnplace.com/sms/media/images/diamondbank.jpg"
/><br />0010549507
</div>
<div id="ibM" align="center">
<img src="http://www.bbnplace.com/sms/media/images/accessbank_logo.png"
style="padding: 5px 0;" /><br />0049632011</div>
</div>
</div></div></div>
<div id="form_pane">
<div id="form_panel"><div align="left" id="signup">
<form name="signup_init" method="post" onsubmit = "return false;" autocomplete="off">
<div align="right"><h2>Get Started!</h2></div>
<div id="signup_init_error_message"></div>
<div><b>Email</b></div>
<div>
<input name="new_user_email" type="email" id="new_user_email" onkeypress="quickSubmit(event, 'signup_init', 'signup_init_error_message')" title="Type Email" size="45" style="width:100%" />
<span style="display: none;">User Email</span>
</div>
<div>
<input type="hidden" name="validemail" value="0" />
<input type="hidden" name="bservice" value="prepaidsms" />
<input type="button" name="button2" id="aC" value="Sign Up" onclick="processFrm('signup_init', 'signup_init_error_message');" />
</div>
</form>
</div></div>
<div id="forms_seperator">OR</div>
<div id="form_panel"><div>
<form name="blogin" method="post" onsubmit="return false;" autocomplete="off">
<div align="right"> <h2>Login</h2> </div>
<div id="errormsg"></div>
<div align="left">Email<br />
<label for="username"></label>
<input type="email" name="busername" id="busername" class="frmfield" value="" onkeypress="quickSubmit(event, 'blogin', 'errormsg')" title="This should be your email" />
</div>
<div align="left">Password<br />
<input type="password" name="passwd" id="passwd" class="frmfield" value="" onkeypress="quickSubmit(event, 'blogin', 'errormsg')" />
</div>
<div>
<input type="hidden" name="bservice" id="bservice" value="prepaidsms" />
<input type="hidden" name="returnto" id="returnto" value="http://sms.bbnplace.com/sentdir.php" />
<input type="hidden" name="errordiv" id="errordiv" value="errormsg" />
<input type="hidden" name="ipaddress" id="ipaddress" value="41.58.242.131" />
</div>
<div align="left">
<label><input name="rememberme" type="checkbox" id="rememberme" value="1" /> Remember Me!</label>
<input type="button" name="button" id="button" class="K" value="Login" onclick="processFrm('blogin','errormsg')" />
</div>
<div style="padding: 5px 0;"><a href="http://www.bbnplace.com/accounts_v2?do=login_failure&service=prepaidsms&returnto=http://sms.bbnplace.com/sentdir.php">Can't access my account</a></div>
</form>
</div></div>
<div style="margin: 18px 0;">
<div style="margin: 0 0 5px;">Join our social conversation</div>
<div class="fb-like" data-href="http://www.facebook.com/bbnsms"
data-send="false" data-layout="button_count" data-
data-show-faces="false"></div>
</div>
</div>
</div></div>
<div style="clear:both;"> </div><div id="statsbar"></div>
</div>
<div id="aj" align="center"></div>
<div align="center" id="bn">
<a href="//bs.bbnplace.com" title="Business Solutions">Business
Solutions</a> | <a href="//dev.bbnplace.com">Developers</a> | <a
href="//www.bbnplace.com/documentation">Documentation</a>
| <a href="//newsroom.bbnplace.com">Newsroom</a>
| <a href="//bbnplace.wordpress.com">Blog</a> | <a
href="//www.bbnplace.com?entry=contact" target="_blank">Contact
Us</a> <a href="//twitter.com/bbnplace" title="on Twitter"
target="_blank"><img src="//www.bbnplace.com/accounts_v2/style/twitter.png"
border="0" /></a> <a
href="//www.facebook.com/bbnplace" title="on Facebook"
target="_blank"><img src="//www.bbnplace.com/accounts_v2/style/facebook.png"
border="0" /></a> <a
href="//www.linkedin.com/groups?gid=4620527" title="at LinkedIn"
target="_blank"><img src="//www.bbnplace.com/accounts_v2/style/linkedin.png"
border="0" /></a> <a
href="//www.youtube.com/user/bbnplace" title="on Youtube"
target="_blank"><img src="//www.bbnplace.com/accounts_v2/style/youtube.png"
border="0" /></a> <a
href="//newsroom.bbnplace.com" title="at the Newsroom"
target="_blank"><img src="//www.bbnplace.com/accounts_v2/style/radio.png"
border="0" /></a>
<br /> Copyright © 2008 - 2016 <a
href="//www.bbnplace.com" title="Browser Based Network Ltd">Browser
Based Network Ltd</a>. <img src="//www.bbnplace.com/accounts_v2/style/ngr.png" align="absmiddle"
/> Nigeria. All rights reserved.
<br /><a href="//docs.bbnplace.com/?article=legals" target="_blank">Terms of Service</a> • <a href="//docs.bbnplace.com/?article=privacy_policy" target="_blank">Privacy Policy Statement</a>
<br /> <b>Desktop | <a
href="//m.sms.bbnplace.com">Mobile</a> Edition
</b>
</div>
<div
style="float: right; position: fixed; z-index: 100000; top: 230px; right: 0px;">
<img src="//www.bbnplace.com/accounts_v2/style/feedback_button.png" onclick="bB()"
style="cursor: pointer;" />
</div><div id="t">
<div id="j">
<form name="t" action="return false;" method="post">
<input type="hidden" name="an" value="Mobile Messaging Solutions" />
<h1 style="color: #555;">Send Us Feedback</h1>
<div>If you notice something is not working properly or you have a
suggestions. We appreciate hearing from you</div>
<div id="V"></div>
<div>
<b>Full Name:</b><br /> <input type="text" name="ag"
style="width: 95%;" />
</div>
<div>
<b>Email:</b><br /> <input type="text" name="bI" style="width: 95%;" />
</div>
<div>
<b>Phone:</b> <i>(optional)</i><br /> <input type="text" name="aR"
style="width: 200px;" />
</div>
<div>
<b>Suggestion:</b><br />
<textarea name="av" rows="10"
style="width: 95%; min-width: 95%; max-width: 95%; height: 100px; max-height: 100px;"></textarea>
</div>
<div>
Read our <a href="http://docs.bbnplace.com/?article=privacy_policy"
target="_blank">Privacy Policy</a> statement
</div>
<div style="margin: 10px 0 0;">
<span id="aC" onclick="aN()">Submit Feedback</span> <span class="K"
onclick="ar()">Cancel</span>
</div>
</form>
</div>
<div id="h">
<div id="bv">
<img src="style/correct_marking.gif" align="absmiddle" /> Your
feedback has been received. Thank you for the time taken
</div>
<div style="margin: 50px 0 0;">
<span class="K" onclick="ar()">Close</span>
</div>
</div>
</div><div id="cs"><div style="text-align:center;white-space:nowrap;"> <div><a href="http://livechat.boldchat.com/aid/4518259610467392165/bc.chat?cwdid=2945491719484839364" target="_blank" onclick="window.open((window.cl&&cl.link||function(link)return link;)(this.href+(this.href.indexOf('?')>=0?'&':'?')+'url='+escape(document.location.href)),'Chat1609025529230970721','toolbar=0,scrollbars=1,location=0,statusbar=0,menubar=0,resizable=1,width=520,height=360,left=190,top=250');return false;"><img src="https://cbi.boldchat.com/aid/4518259610467392165/bc.cbi?cbdid=6794783422064795815" border="0"/></a></div></div></div></body>
</html>
【问题讨论】:
浏览器的“登录”状态基于其 Cookie 标头。服务器通过 Set-Cookie 标头成功登录后将其设置为一个值,请确保遵循该值。 【参考方案1】:您只需要发布到正确的网址,一旦成功登录,您将能够获得您喜欢的任何页面,这是一个使用requests的工作示例:
import requests
from bs4 import BeautifulSoup
post = "http://www.bbnplace.com/accounts_v2/source/loginp.php"
data = "eml": "your_email",
"p": "your_pass"
# use a Session to persist cookies.
with requests.Session() as s:
r = s.post(post, data=data) # log us in
print(r.content) # will output "successful" for correct login
r = s.get("http://sms.bbnplace.com/account.php") # get account page
soup = BeautifulSoup(r.content)
print(soup.title.text)
【讨论】:
哎呀,谢谢,我会一直绕圈子。以上是关于如何使用 Python 登录网站进行抓取的主要内容,如果未能解决你的问题,请参考以下文章