011 python接口 bs4提取结果

Posted 汁虫

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了011 python接口 bs4提取结果相关的知识,希望对你有一定的参考价值。

 

\'\'\'
时间:2018/11/03
功能:bs4提取结果
目录:
    一: 登录拉勾网
\'\'\'

 

一: 登录拉勾网
# coding:utf-8
import requests
import re
from bs4 import BeautifulSoup
import urllib3
import hashlib
urllib3.disable_warnings()
import sys

class LoginLgw():
    def __init__(self, s):
        self.s = s

    def getToeknCode(self):
        \'\'\'
        获取拉勾网 - token和code
        :return:    {"X_Anti_Forge_Token":"xxx", "X_Anti_Forge_Code":"xxx"}
        \'\'\'
        # 更新数据 - 头部信息
        url = "https://passport.lagou.com/login/login.html"
        head = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
            }
        self.s.headers.update(head)

        # 获取数据 - token和code
        data = self.s.get(url, verify = False)
        soup = BeautifulSoup(data.content, "html.parser", from_encoding="utf-8")
        tokenCode = {}
        try:
            t = soup.find_all("script")[1].get_text()
            tokenCode[\'X_Anti_Forge_Token\'] = re.findall(r"Token = \'(.+?)\'", t)[0]
            tokenCode[\'X_Anti_Forge_Code\'] = re.findall(r"Code = \'(.+?)\'", t)[0]
            return tokenCode
        except:
            print("Get Faild")
            tokenCode[\'X_Anti_Forge_Token\'] = ""
            tokenCode[\'X_Anti_Forge_Code\'] = ""
            return tokenCode

    def encryptPwd(self, passwd):
        \'\'\'
        密码加密
        :param passwd: 原始密码
        :return:       加密密码
        \'\'\'
        passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest()    # md5加密
        passwd = "veenike" + passwd + "veenike"                     # veennike : js文件 - 写死的值
        passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest()    # md5加密
        return passwd

    def login(self, user, password):
        \'\'\'
        登录拉勾网
        :param user:        用户名
        :param password:    密码
        :return:        登录成功 - 服务信息
        \'\'\'
        # 获取数据 - token和code
        tokenCode = self.getToeknCode()
        print(tokenCode)

        # 密码加密
        password = self.encryptPwd(password)
        print(password)

        # 更新数据 - 头部信息
        url = "https://passport.lagou.com/login/login.json"
        head = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest",
        "X-Anit-Forge-Token": tokenCode[\'X_Anti_Forge_Token\'],
        "X-Anit-Forge-Code": tokenCode[\'X_Anti_Forge_Code\'],
        "Referer": "https://passport.lagou.com/login/login.html",
        }
        self.s.headers.update(head)

        # 登录网站
        body = {
                "isValidate":\'true\',
                "username": user,
                "password": password,
                "request_form_verifyCode": "",
                "submit": ""
        }
        r = self.s.post(url, data=body, verify=False)
        try:
            print(r.text)
            return r.json()
        except:
            print("login faild: %s" %r.text)
            return None

if __name__ == "__main__":
    s = requests.session()
    lgw = LoginLgw(s)
    lgw.login("user", "password")
{\'X_Anti_Forge_Token\': \'a2724c9e-40be-493b-84a9-227a3cafe955\', \'X_Anti_Forge_Code\': \'77786694\'}
d45ef25791078e956e6915ba194d776a
{"content":{"rows":[]},"message":"操作成功","state":1,"submitCode":76585064,"submitToken":"1e756b35-4bbe-4853-b1b8-767042f86771"}

 

 

以上是关于011 python接口 bs4提取结果的主要内容,如果未能解决你的问题,请参考以下文章

Python页面解析和数据提取bs4

Python爬虫 BeautifulSoup(bs4)-- bs4介绍安装bs4bs4基础语法

Python爬虫 BeautifulSoup(bs4)-- bs4介绍安装bs4bs4基础语法

python爬虫入门

Python 爬虫--数据解析(bs4方法)

python接口自动化测试十八:使用bs4框架爬取图片