011 python接口 bs4提取结果
Posted 汁虫
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了011 python接口 bs4提取结果相关的知识,希望对你有一定的参考价值。
\'\'\' 时间:2018/11/03 功能:bs4提取结果 目录: 一: 登录拉勾网 \'\'\'
一: 登录拉勾网
# coding:utf-8 import requests import re from bs4 import BeautifulSoup import urllib3 import hashlib urllib3.disable_warnings() import sys class LoginLgw(): def __init__(self, s): self.s = s def getToeknCode(self): \'\'\' 获取拉勾网 - token和code :return: {"X_Anti_Forge_Token":"xxx", "X_Anti_Forge_Code":"xxx"} \'\'\' # 更新数据 - 头部信息 url = "https://passport.lagou.com/login/login.html" head = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0", } self.s.headers.update(head) # 获取数据 - token和code data = self.s.get(url, verify = False) soup = BeautifulSoup(data.content, "html.parser", from_encoding="utf-8") tokenCode = {} try: t = soup.find_all("script")[1].get_text() tokenCode[\'X_Anti_Forge_Token\'] = re.findall(r"Token = \'(.+?)\'", t)[0] tokenCode[\'X_Anti_Forge_Code\'] = re.findall(r"Code = \'(.+?)\'", t)[0] return tokenCode except: print("Get Faild") tokenCode[\'X_Anti_Forge_Token\'] = "" tokenCode[\'X_Anti_Forge_Code\'] = "" return tokenCode def encryptPwd(self, passwd): \'\'\' 密码加密 :param passwd: 原始密码 :return: 加密密码 \'\'\' passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest() # md5加密 passwd = "veenike" + passwd + "veenike" # veennike : js文件 - 写死的值 passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest() # md5加密 return passwd def login(self, user, password): \'\'\' 登录拉勾网 :param user: 用户名 :param password: 密码 :return: 登录成功 - 服务信息 \'\'\' # 获取数据 - token和code tokenCode = self.getToeknCode() print(tokenCode) # 密码加密 password = self.encryptPwd(password) print(password) # 更新数据 - 头部信息 url = "https://passport.lagou.com/login/login.json" head = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "X-Requested-With": "XMLHttpRequest", "X-Anit-Forge-Token": tokenCode[\'X_Anti_Forge_Token\'], "X-Anit-Forge-Code": tokenCode[\'X_Anti_Forge_Code\'], "Referer": "https://passport.lagou.com/login/login.html", } self.s.headers.update(head) # 登录网站 body = { "isValidate":\'true\', "username": user, "password": password, "request_form_verifyCode": "", "submit": "" } r = self.s.post(url, data=body, verify=False) try: print(r.text) return r.json() except: print("login faild: %s" %r.text) return None if __name__ == "__main__": s = requests.session() lgw = LoginLgw(s) lgw.login("user", "password")
{\'X_Anti_Forge_Token\': \'a2724c9e-40be-493b-84a9-227a3cafe955\', \'X_Anti_Forge_Code\': \'77786694\'} d45ef25791078e956e6915ba194d776a {"content":{"rows":[]},"message":"操作成功","state":1,"submitCode":76585064,"submitToken":"1e756b35-4bbe-4853-b1b8-767042f86771"}
以上是关于011 python接口 bs4提取结果的主要内容,如果未能解决你的问题,请参考以下文章
Python爬虫 BeautifulSoup(bs4)-- bs4介绍安装bs4bs4基础语法