python+selenium+phantomjs爬百度美女图片

Posted 2020-09-25

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python+selenium+phantomjs爬百度美女图片相关的知识，希望对你有一定的参考价值。

#conding:utf-8
import unittest
from selenium import webdriver
from urllib.request import *
import re
import time
from bs4 import BeautifulSoup

#测试类
class baidupic(unittest.TestCase):
    #初始化测试
    def setUp(self):
        self.dv = webdriver.PhantomJS()
    #测试方法
    def test_getPic(self):
        dv = self.dv
        dv.get("http://image.baidu.com/")
        dv.find_element_by_id("kw").send_keys("美女")
        dv.find_element_by_class_name("s_btn").click()
        time.sleep(1)
        #滚轮到最下面，滚动的次数越多，下载的美女图片就越多
        js = "window.scrollTo(0, document.body.scrollHeight)"
        dv.execute_script(js)
        time.sleep(1)
        dv.execute_script(js)
        time.sleep(1)
        #正则获取图片地址，宽度，高度，后缀
        pattern = re.compile(u‘data-objurl="(.*?)" data-thumburl=".*?" data-fromurl=".*?" data-fromurlhost=".*?" data-ext="(.*?)" data-saved=".*?" data-pi=".*?" data-specialtype=".*?" data-cs=".*?" data-width="(.*?)" data-height="(.*?)" data-hostname=‘,re.S)
        items = re.findall(pattern,dv.page_source)
        
        index = 1
        for item in items:
            print("图片地址：%s\r\n类型：%s\r\n宽度：%s\r\n高度：%s\r\n " % (item[0],item[1],item[2],item[3]))
            try:
                self.saveImg(item[0],"d:\\mm\\%s.%s"%(index,item[1]))
            except:
                continue
            index = index + 1

    #保存图片到本地
    def saveImg(self,imgURL,fileName):
        img = urlopen(imgURL)
        data = img.read()
        f = open(fileName,"wb")
        f.write(data)
        f.close()

    #结束测试
    def tearDown(self):
        self.dv.quit()

以上是关于python+selenium+phantomjs爬百度美女图片的主要内容，如果未能解决你的问题，请参考以下文章