多线程批量下载远程图片

Posted 东歌

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了多线程批量下载远程图片相关的知识,希望对你有一定的参考价值。

python多线程使用场景:多线程采集, 以及性能测试等 。

数据库驱动类-简单封装下

mysqlDriver.py

#!/usr/bin/python3
#-*- coding: utf-8 -*-
# author:zhouchao
# mysql 驱动模型类
import pymysql;
import traceback; 

class mysqlDriver:
    host="localhost";
    user="root";
    password="";
    database="fitcmoe_boke";
    charset="GBK";
    db = "false";

    # 连接数据库
    def connect(self, host="", user="",password="",database="",charset = ""):
        if host and user :
            self.host = host;
            self.user = user;
            self.password = password;
            self.database = database;
            self.charset = charset;
        self.db=pymysql.connect(host=self.host,user=self.user,password=self.password,database=self.database,charset=self.charset);


    # 查询多条
    def select(self,sql,params = []):
        if params :
            sql = self.bindParams(sql, params);
        # print(sql);return;
        cursor = self.db.cursor();
        cursor.execute(sql);
        data = cursor.fetchall();
        self.db.close();
        return data;


    # 查询单条
    def find(self,sql,params = []):
        if params :
            sql = self.bindParams(sql, params);
        cursor = self.db.cursor();
        cursor.execute(sql);
        data = cursor.fetchone();
        self.db.close();
        return data;


    # 新增
    def add(self,sql,params=[]):
        if params :
            sql = self.bindParams(sql, params);
        try:
            cursor = self.db.cursor();
            cursor.execute(sql);
            newId = self.db.insert_id();
            self.db.commit();
            self.db.close();
            return newId;   
        except "involid level":
           # 发生错误时回滚
           print("发生异常","involid level");
           self.db.rollback();
           self.db.close();

    # 更新
    def save(self,sql,params=[]):
        if params :
            sql = self.bindParams(sql, params);
        try:
            cursor = self.db.cursor();
            resultNum=cursor.execute(sql);
            self.db.commit();
            self.db.close();
            return resultNum;   
        except "involid level":
           # 发生错误时回滚
           print("发生异常","involid level");
           self.db.rollback();
           self.db.close();

    def delete(self,sql,params=[]):
        if params :
            sql = self.bindParams(sql, params);
            # self.file_put_contents(\'rt.txt\',sql,\'append\');
            # print(sql);return;
        try:
            cursor = self.db.cursor();
            resultNum=cursor.execute(sql);
            self.db.commit();
            self.db.close();
            return resultNum;   
        except "involid level":
           # 发生错误时回滚
           print("发生异常","involid level");
           self.db.rollback();
           self.db.close();
            

    # 参数绑定  空格好奇怪啊
    def bindParams(self,sql, params):
        finalSql = "";
        length = len(sql);
        replaceNum = 0;
        for x in sql: 
            if x == "?":
               x = params[replaceNum];
               typeX = type(x);
               if typeX.__name__ == "list":
                    temp = \'(\';
                    for t in x:
                        temp +=  "\'" +str(t)+"\',";
                    x=temp.rstrip(",");
                    x+=")";
               else :
                    if str(x).isdigit() :
                        x = int(x);
                    else:
                        x = "\'" +str(x) +"\'";
                       
                    
               replaceNum = replaceNum + 1;
            finalSql += str(x);
            # finalSql += x;
        return finalSql;

    # 将sql 写入本地文件
    # r+ = pappend 向前追加
    # w+ = 覆盖添加  
    # a+ = append  向后追加
    def file_put_contents(self,fileName,content,modeName =\'\'):  
        if(modeName == \'pappend\'):
          mode = "r+";
        elif(modeName == \'append\'):
         mode = "a+";
        else:
         mode = "w+";

        fileObject = open(fileName,mode);
        fileObject.write(content);
        fileObject.close();



# 使用说明 异常处理类问题??
# ? 代表参数
# ? 可以是列表或数字  如  [1,3,5,7]  和 3 
# # 参数位置是按照顺序的 

# 简单使用
# mysqlD = mysqlDriver();
# mysqlD.connect();
# data = mysqlD.select("select from t_article where id in ? limit ? ",[[33,34,35,38],3]);
# data = mysqlD.delete("delete from t_article where aid in ?",[[43,44,270,280]]);



    

文件下载函数

file_get_contents.py

#!/usr/bin/python3
#-*- coding: utf-8 -*-
# author:zhouchao
# 文件处理函数


def file_get_contents(path,mode="r+"):
    fileObject = open(path,mode);
    content = fileObject.read();
    fileObject.close();
    return content;



# r+ = pappend 向前追加
# w+ = 覆盖添加  
# a+ = append  向后追加
# wb+ 直接使用二进制添加
def file_put_contents(fileName,content,modeName =\'\'):  

    if(modeName == \'pappend\'):
      mode = "r+";
    elif(modeName == \'append\'):
     mode = "a+";
    elif(modeName == \'wb+\'):
     mode = "wb+";
    else:
     mode = "w+";

    fileObject = open(fileName,mode);
    fileObject.write(content);
    fileObject.close();

# file_put_contents(\'rt.txt\',\'da ge\');

批量远程图片下载

multiPool.py

#!/usr/bin/python3
#-*- coding: utf-8 -*-
# author:zhouchao
# 功能:多线程 批量下载远程图片

import random
import time
import os
import math
from multiprocessing.dummy import Pool as ThreadPool
import urllib.request
import requests
import sys
sys.path.append(r"../db")
sys.path.append(r"../function")
from file_get_contents import *
from mysqlDriver import *

sql = "select img from images limit ?";
mysqlD = mysqlDriver();
mysqlD.connect();
data = mysqlD.select(sql, [100])

saveDir = "D:/images/";
def  downImg(url):
    url = url[0];
    ir = requests.get(url)
    if ir.status_code == 200:
        # wb+  保存二进制
        fileName = str(time.time()) +\'.jpg\';
        file_put_contents(saveDir+fileName,ir.content,"wb+");



startTime = time.time();

# 不开启多线程
# length = len(data);
# for x in range(length):
#     downImg(data[x][0])


# 开启多线程
pool = ThreadPool(8) # Sets the pool size to 4
results = pool.map(downImg,data);
pool.close();
pool.join();


endTime = time.time();
consumeTime = endTime - startTime
print("程序运行时间:"+str(consumeTime)+"")

注意:

sys.path.append(r"../db")
sys.path.append(r"../function")
请引用自己保存文件路径

没有开启和开启多线程所耗时间:


以上是关于多线程批量下载远程图片的主要内容,如果未能解决你的问题,请参考以下文章

python爬虫之多线程threading多进程multiprocessing协程aiohttp 批量下载图片

python爬虫之多线程threading多进程multiprocessing协程aiohttp 批量下载图片

基于SmartThreadPool线程池技术实现多任务批量处理

从CSV文件中读取jpg图片的URL地址并多线程批量下载

aiohttp 异步http请求-3.异步批量下载图片

python 远程批量多线程paramiko 和 threading案例