python爬取猫眼代码没
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python爬取猫眼代码没相关的知识,希望对你有一定的参考价值。
# -*- coding: utf-8 -*-"""
Created on Mon Jul 16 09:52:37 2018
@author: HJY
"""
#猫眼电影排行爬取
import requests
import re
import json
import time
from requests.exceptions import RequestException
def get_one_page(url):
try:
headers =
\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/67.0.3396.99 Safari/537.36\'
response = requests.get(url,headers=headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
pattern = re.compile(
\'<dd>.*?board-index.*?>(.*?)</i>.*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>\',re.S
)
items = re.findall(pattern,html)
for item in items:
yield
\'排名\':item[0],
\'片名\':item[1],
\'主演\':item[2].strip()[3:],
\'上映时间\':item[3][5:],
\'评分\':item[4] + item[5]
def write_to_file(content):
with open(\'result.txt\',\'a\',encoding=\'utf-8\') as f:
# print(type(json.dumps(content)))
f.write(json.dumps(content,ensure_ascii=False)+\'\\n\')
def main(offset):
url = \'http://maoyan.com/board/4?offset=\' + str(offset)
html = get_one_page(url)
for item in parse_one_page(html):
print(item)
write_to_file(item)
if __name__ == \'__main__\':
for i in range(10):
main(offset = i*10)
time.sleep(1) 参考技术A 有,现成的代码。需要联系 参考技术B 有的,1036482191
以上是关于python爬取猫眼代码没的主要内容,如果未能解决你的问题,请参考以下文章