import requests
import re
from urllib.parse import urlencode
session = requests.session()
r1 = session.get(
"https://passport.lagou.com/login/login.html",
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
}
)
X_Anit_Forge_Code = re.findall("X_Anit_Forge_Code =‘(.*?)‘",r1.text,re.S)
X_Anit_Forge_Token = re.findall("X_Anit_Forge_Token =‘(.*?)‘",r1.text,re.S)
r2 = session.post(
"https://passport.lagou.com/login/login.json",
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
"Referer":"https://passport.lagou.com/login/login.html",
"X-Anit-Forge-Code":X_Anit_Forge_Code,
"X-Anit-Forge-Token":X_Anit_Forge_Token,
"X-Requested-With":"XMLHttpRequest"
},
data={
"isValidate": True,
‘username‘: ‘18611453110‘,
‘password‘: ‘70621c64832c4d4d66a47be6150b4a8e‘,
‘request_form_verifyCode‘: ‘‘,
‘submit‘: ‘‘
}
)
r3 = session.get(
"https://passport.lagou.com/grantServiceTicket/grant.html",
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
‘Referer‘: ‘https://passport.lagou.com/login/login.html‘,
}
)
r4 = session.get(
‘https://www.lagou.com/resume/myresume.html‘,
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
}
)
print(‘18611453110‘ in r4.text)
# ============================
# res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1]
# url = "https://www.lagou.com/jobs/list_"+res
# r5 =session.get(url,
# headers={
# "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
# },
# params={
# ‘gj‘: ‘3年及以下‘,
# ‘px‘: ‘default‘,
# ‘yx‘: ‘25k-50k‘,
# ‘city‘: ‘北京‘
# }
# ) #按照套路进行,结果取不到值,因为人家发的是ajax请求获取的数据,所以选择了r6的方式
res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1]
url = "https://www.lagou.com/jobs/list_"+res
r6 = session.post(
‘https://www.lagou.com/jobs/postionAjax.json‘,
headers = {
‘Referer‘: url,
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
},
data = {
"first":True,
"pn":1,
"kd":"java高级开发"
},
params = {
"gj":"3年及以下",
"gx":"default",
"yx":"15k-25k",
"city":"北京",
"needAddtionResult":False,
"isSchoolJob":0
}
)
from pprint import pprint
# print(r6.json())
comapines_list=r6.json()[‘content‘][‘positionResult‘][‘result‘]
for comapiny in comapines_list:
positionId=comapiny[‘positionId‘]
company_link=‘https://www.lagou.com/jobs/{pos_id}.html‘.format(pos_id=positionId)
companyShortName = comapiny[‘companyShortName‘]
positionName = comapiny[‘positionName‘]
salary = comapiny[‘salary‘]
print(‘‘‘
详情连接:%s
公司名:%s
职位名:%s
薪资:%s
‘‘‘ %(company_link,companyShortName,positionName,salary))
r7=session.get(company_link,
headers={
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36‘,
}
)
X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = ‘(.*?)‘", r7.text, re.S)[0]
X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = ‘(.*?)‘", r7.text, re.S)[0]
# print(X_Anti_Forge_Token,X_Anti_Forge_Code)
session.post(‘https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json‘,
headers={
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36‘,
‘Referer‘: company_link,
‘X-Anit-Forge-Code‘: X_Anti_Forge_Code,
‘X-Anit-Forge-Token‘: X_Anti_Forge_Token,
‘X-Requested-With‘: ‘XMLHttpRequest‘
},
data={
‘positionId‘:positionId,
‘type‘:1,
‘force‘:True
}
)
print(‘%s 投递成功‘ %(companyShortName))