0x00案例 爬取博客园某页面的所有图片并下载到本地
连续搞了一周的python,收获颇多,主要还是锻炼了自己的脾气。。。话不多说,先贴上脚本
#coding:utf-8 import urllib2 import re url="https://www.cnblogs.com/peterpan0707007/p/7620048.html" headers={‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0‘} req=urllib2.Request(url,headers=headers) resp=urllib2.urlopen(req).read() data=re.compile(r‘http:.+\\.png‘).findall(resp) #正则匹配png图片 #print data for i in data: with open(‘result.txt‘,‘a‘) as fw: fw.write(i+‘\\n‘) with open(‘result.txt‘,‘r‘) as fr: for line in fr.readlines(): response=urllib2.urlopen(line).read() filename=line.strip(‘\\n‘).split(‘-‘)[2]+‘.png‘ with open(filename,‘wb‘) as fw: fw.write(response) print ‘done‘
运行结果