Python爬虫:抓取单个图片/多个图片
Posted qq_51102350
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python爬虫:抓取单个图片/多个图片相关的知识,希望对你有一定的参考价值。
抓取单个图片:
#pics.py
"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup
def catch_onepicture(url,headers):
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html,'lxml')
links = soup.find_all('a',class_='mainphoto')
pattern = '<img src="(.*?)" width="457"/>'
link = re.findall(string=str(links[0]),pattern=pattern)
return link[0]
#main.py
import requests
import pics
headers ={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
}
url = 'https://movie.douban.com/photos/photo/2638827823/#title-anchor'
link = pics.catch_onepicture(url,headers)
print(link)
with open('image1.ico','wb') as file:
file.write(requests.get(link).content)
抓取多个图片:
#pics.py
"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup
def catch_onepicture(url,headers,i):
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html,'lxml')
links = soup.find_all('img',width=100)
pattern = '<img alt=".*?" class="" src="(.*?)" width="100"/>'
link = re.findall(string=str(links[i]),pattern=pattern)
return link[0]
#main.py
import pics
import requests
import time
headers ={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
}
url = 'https://movie.douban.com/top250'
for i in range(25):
link = pics.catch_onepicture(url,headers,i=i)
print(link)
file_name = f"D:/imageFromSpiders/image{i+1}.ico"
with open(file_name,'wb') as file:
file.write(requests.get(link).content)
time.sleep(1)
以上是关于Python爬虫:抓取单个图片/多个图片的主要内容,如果未能解决你的问题,请参考以下文章
[Python爬虫] 之二十六:Selenium +phantomjs 利用 pyquery抓取智能电视网站图片信息