07-模块：shutil，shelve，xml，hashlib，subprocess，re，ConfigParser

Posted 2020-09-02 汉克书

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了07-模块：shutil，shelve，xml，hashlib，subprocess，re，ConfigParser相关的知识，希望对你有一定的参考价值。

第1章模块

1.1 shutil 模块

  1 import shutil
  2 # 高级的 文件、文件夹、压缩包 处理模块
  3 
  4 # 将文件内容拷贝到另一个文件中，可以部分内容
  5 #
  6 # shutil.copyfileobj(fsrc, fdst[, length])
  7 # f = open("example.log")
  8 # f2 = open("新的","w")
  9 # shutil.copyfileobj(f,f2)
 10 # shutil.copyfile("example.log","新的2")
 11 # shutil.copymode("example.log","新的2")  # copy权限
 12 
 13 # shutil.copystat("example.log","新的2")  # copy copy状态
 14 
 15 # shutil.copy(\'example.log\', \'新的3\')  # 拷贝文件和权限
 16 # shutil.copy2(\'example.log\', \'新的4\')  # 拷贝文件和状态信息
 17 # shutil.copytree(r"D:\\学习python\\pyn\\培训\\day5\\access.log.1",r"D:\\学习python\\pyn\\培训\\day6",ignore=shutil.ignore_patterns(\'*.pyc\', \'tmp*\'))
 18 
 19 # shutil.rmtree()  # 全删了
 20 
 21 
 22 # shutil.make_archive(base_name, format,...)    打包
 23 
 24 # shutil.make_archive("day5","zip","D:\\学习python\\pyn\\自学\\day5")
 25 
 26 
 27 
 28 import zipfile
 29 
 30 # 压缩
 31 # z = zipfile.ZipFile(\'laxi.zip\', \'w\')
 32 # z.write(\'example.log\')
 33 # z.write(r\'D:\\学习python\\pyn\\培训\\day5\\access.log.1\',arcname="access.log.1")
 34 # z.close()
 35 
 36 # 解压
 37 # z = zipfile.ZipFile(\'laxi.zip\', \'r\')
 38 # z.extractall(members=["指定"],path=r"d:\\\\")
 39 # z.extract("example.log")
 40 # z.close()
 41 
 42 # import tarfile
 43 #
 44 # # 压缩
 45 # tar = tarfile.open(\'your.tar\',\'w\')
 46 # tar.add(\'/Users/wupeiqi/PycharmProjects/bbs2.zip\', arcname=\'bbs2.zip\')
 47 # tar.add(\'/Users/wupeiqi/PycharmProjects/cmdb.zip\', arcname=\'cmdb.zip\')
 48 # tar.close()
 49 #
 50 # # 解压
 51 # tar = tarfile.open(\'your.tar\',\'r\')
 52 # tar.extractall()  # 可设置解压地址
 53 # tar.close()

view code

1.2 shelve模块

shelve模块是一个简单的k,v将内存数据通过文件持久化的模块，可以持久化任何pickle可支持的python数据格式

  1 #!/usr/bin/env python
  2 # _*_ coding:utf-8 _*_
  3 
  4 import shelve
  5 
  6 d = shelve.open(\'shelve_test\') #打开一个文件
  7 
  8 class Test(object):
  9     def __init__(self,n):
 10         self.n = n
 11 
 12 def func():
 13     print("11")
 14 
 15 t = Test(123)
 16 t2 = Test(123334)
 17 
 18 name = ["alex","rain","test"]
 19 d["test"] = name #持久化列表
 20 d["t1"] = t      #持久化类
 21 d["t2"] = t2
 22 d["t3"] = func
 23 d.close()
 24

view code

  1 # 反序列化 导入这个模块 才能导入
  2 
  3 # >>> import shelve
  4 # >>> s = shelve.open("shelve_test")
  5 # >>> for i in s.keys():print(i)
  6 # ...
  7 # test
  8 # t1
  9 # t2
 10 # t3
 11

view code

  1 import shelve
  2 d = shelve.open(\'shelve_test\')
  3 # d.items()  # 读所有
  4 print(d.get("test"))
  5 print(d.get("t1"))
  6 print(d.get("t2"))
  7 d.pop()  # 删的就是硬盘数据
  8 d["test"][0] = "1121"
  9 d.close()

view code

1.3 xml处理模块

  1 <?xml version="1.0"?>
  2 <data>
  3     <country name="Liechtenstein">
  4         <rank updated="yes">2</rank>
  5         <year>2008</year>
  6         <gdppc>141100</gdppc>
  7         <neighbor name="Austria" direction="E"/>
  8         <neighbor name="Switzerland" direction="W"/>
  9     </country>
 10     <country name="Singapore">
 11         <rank updated="yes">5</rank>
 12         <year>2011</year>
 13         <gdppc>59900</gdppc>
 14         <neighbor name="Malaysia" direction="N"/>
 15     </country>
 16     <country name="Panama">
 17         <rank updated="yes">69</rank>
 18         <year>2011</year>
 19         <gdppc>13600</gdppc>
 20         <neighbor name="Costa Rica" direction="W"/>
 21         <neighbor name="Colombia" direction="E"/>
 22     </country>
 23 </data>

view code

  1 # import xml.etree.ElementTree as ET
  2 #
  3 # tree = ET.parse("xmltest.xml")    # 解析
  4 # root = tree.getroot()  # 根节点
  5 # print(root.tag)
  6 
  7 # #遍历xml文档
  8 # for child in root:
  9 #     print(child.tag, child.attrib)
 10 #     for i in child:
 11 #         print(i.tag,i.text,i.attrib)
 12 
 13 #只遍历year 节点
 14 # for node in root.iter(\'year\'):
 15 #     print(node.tag,node.text)
 16 
 17 
 18 
 19 
 20 # 改
 21 # import xml.etree.ElementTree as ET
 22 #
 23 # tree = ET.parse("xmltest.xml")
 24 # root = tree.getroot()
 25 #
 26 # #修改
 27 # for node in root.iter(\'year\'):
 28 #     new_year = int(node.text) + 1
 29 #     node.text = str(new_year)
 30 #     node.set("updated","yes")    # 更改属性
 31 #
 32 # tree.write("xmltest2.xml")
 33 
 34 #
 35 # #删除node
 36 # for country in root.findall(\'country\'):
 37 #    rank = int(country.find(\'rank\').text)
 38 #    if rank > 50:
 39 #      root.remove(country)
 40 #
 41 # tree.write(\'output.xml\',encoding=\'utf8\')
 42 
 43 
 44 # import xml.etree.ElementTree as ET
 45 #
 46 #
 47 # new_xml = ET.Element("namelist")
 48 # name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
 49 # age = ET.SubElement(name,"age",attrib={"checked":"no"})
 50 # sex = ET.SubElement(name,"sex")
 51 # sex.text = \'33\'
 52 # name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
 53 # age = ET.SubElement(name2,"age")
 54 # age.text = \'19\'
 55 #
 56 # et = ET.ElementTree(new_xml) #生成文档对象
 57 # et.write("test.xml", encoding="utf-8",xml_declaration=True)
 58 #
 59 # ET.dump(new_xml) #打印生成的格式

view code

1.4 hashlib模块

  1 import hashlib
  2 
  3 m = hashlib.md5()
  4 m.update(b"Hello")
  5 m.update(b"It\'s me")
  6 print(m.digest())
  7 m.update(b"It\'s been a long time since last time we ...")
  8 
  9 print(m.digest())  # 2进制格式hash
 10 print(len(m.hexdigest()))  # 16进制格式hash
 11 \'\'\'
 12 def digest(self, *args, **kwargs): # real signature unknown
 13     """ Return the digest value as a string of binary data. """
 14     pass
 15 
 16 def hexdigest(self, *args, **kwargs): # real signature unknown
 17     """ Return the digest value as a string of hexadecimal digits. """
 18     pass
 19 
 20 \'\'\'
 21 import hashlib
 22 
 23 # ######## md5 ########
 24 
 25 hash = hashlib.md5()
 26 hash.update(\'admin\')
 27 print(hash.hexdigest())
 28 
 29 # ######## sha1 ########
 30 
 31 hash = hashlib.sha1()
 32 hash.update(\'admin\')
 33 print(hash.hexdigest())
 34 
 35 # ######## sha256 ########
 36 
 37 hash = hashlib.sha256()
 38 hash.update(\'admin\')
 39 print(hash.hexdigest())
 40 
 41 # ######## sha384 ########
 42 
 43 hash = hashlib.sha384()
 44 hash.update(\'admin\')
 45 print(hash.hexdigest())
 46 
 47 # ######## sha512 ########
 48 
 49 hash = hashlib.sha512()
 50 hash.update(\'admin\')
 51 print(hash.hexdigest())
 52

view code

1.5 Subprocess模块

The subprocess module allows you to spawn new processes, connect to their input/output/error pipes, and obtain their return codes. This module intends to replace several older modules and functions:

os.system

os.spawn*

The recommended approach to invoking subprocesses is to use the run() function for all use cases it can handle. For more advanced use cases, the underlying Popen interface can be used directly.

The run() function was added in Python 3.5; if you need to retain compatibility with older versions, see the Older high-level API section.

subprocess.run(args, *, stdin=None, input=None, stdout=None, stderr=None, shell=False, timeout=None, check=False)

Run the command described by args. Wait for command to complete, then return a CompletedProcess instance.

The arguments shown above are merely the most common ones, described below in Frequently Used Arguments (hence the use of keyword-only notation in the abbreviated signature). The full function signature is largely the same as that of the Popen constructor - apart from timeout, input and check, all the arguments to this function are passed through to that interface.

This does not capture stdout or stderr by default. To do so, pass PIPE for the stdout and/or stderr arguments.

The timeout argument is passed to Popen.communicate(). If the timeout expires, the child process will be killed and waited for. The TimeoutExpired exception will be re-raised after the child process has terminated.

The input argument is passed to Popen.communicate() and thus to the subprocess’s stdin. If used it must be a byte sequence, or a string if universal_newlines=True. When used, the internal Popen object is automatically created withstdin=PIPE, and the stdin argument may not be used as well.

If check is True, and the process exits with a non-zero exit code, a CalledProcessError exception will be raised. Attributes of that exception hold the arguments, the exit code, and stdout and stderr if they were captured.

常用subprocess方法示例

  1 
  2 \'\'\'
  3 >>> import subprocess
  4 >>> subprocess.run("df")
  5 文件系统          1K-块    已用     可用 已用% 挂载点
  6 udev             484160       0   484160    0% /dev
  7 tmpfs            100728    9192    91536   10% /run
  8 /dev/sda1      19478204 4900436 13565288   27% /
  9 tmpfs            503636     204   503432    1% /dev/shm
 10 tmpfs              5120       4     5116    1% /run/lock
 11 tmpfs            503636       0   503636    0% /sys/fs/cgroup
 12 tmpfs            100728      52   100676    1% /run/user/1000
 13 CompletedProcess(args=\'df\', returncode=0)
 14 >>> subprocess.run([\'df\',\'-h\'])
 15 文件系统        容量  已用  可用 已用% 挂载点
 16 udev            473M     0  473M    0% /dev
 17 tmpfs            99M  9.0M   90M   10% /run
 18 /dev/sda1        19G  4.7G   13G   27% /
 19 tmpfs           492M  204K  492M    1% /dev/shm
 20 tmpfs           5.0M  4.0K  5.0M    1% /run/lock
 21 tmpfs           492M     0  492M    0% /sys/fs/cgroup
 22 tmpfs            99M   52K   99M    1% /run/user/1000
 23 CompletedProcess(args=[\'df\', \'-h\'], returncode=0)
 24 >>> subprocess.run("df -h | grep /dev/sda1",shell=True)
 25 /dev/sda1        19G  4.7G   13G   27% /
 26 CompletedProcess(args=\'df -h | grep /dev/sda1\', returncode=0)
 27 >>>
 28 
 29 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True)        # Popen 其实就是启用一个进程
 30 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True)
 31 >>> df：无效选项 -- s
 32 Try \'df --help\' for more information.
 33 
 34 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True)
 35 >>>
 36 #错误的话，终端会卡住。
 37 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True,stdout=subprocess.PIPE)
 38 >>> df：无效选项 -- s
 39 Try \'df --help\' for more information.
 40 
 41 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True,stdout=subprocess.PIPE)
 42 >>>
 43 
 44 # 错误正确一起输出
 45 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
 46 >>> res
 47 <subprocess.Popen object at 0x7f20265b65f8>
 48 >>> print(res)
 49 
 50 >>> res.stdout.read()
 51 b\'\'
 52 >>> res.stderr.read()
 53 b"df\\xef\\xbc\\x9a\\xe6\\x97\\xa0\\xe6\\x95\\x88\\xe9\\x80\\x89\\xe9\\xa1\\xb9 -- s\\nTry \'df --help\' for more information.\\n"
 54 >>>
 55 
 56 
 57 >>> res = subprocess.Popen("top -bn 3",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
 58 >>> res.stdout.read()
 59 # 会等执行万top命令后才返回
 60 
 61 >>> res = subprocess.Popen("top -bn 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
 62 >>> res.poll()
 63 0
 64 
 65 >>> res = subprocess.Popen("top -bn 2",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
 66 >>> res.wait()
 67 0
 68 >>>   # 会一直等到运行结束，然后返回 状态
 69 
 70 res.terminate() 杀掉所启动进程
 71 res.communicate(timeout=2) 等待任务结束
 72 
 73 args：shell命令，可以是字符串或者序列类型（如：list，元组）
 74 bufsize：指定缓冲。0 无缓冲,1 行缓冲,其他 缓冲区大小,负值 系统缓冲
 75 stdin, stdout, stderr：分别表示程序的标准输入、输出、错误句柄
 76 preexec_fn：只在Unix平台下有效，用于指定一个可执行对象（callable object），它将在子进程运行之前被调用
 77 close_sfs：在windows平台下，如果close_fds被设置为True，则新创建的子进程将不会继承父进程的输入、输出、错误管道。
 78 所以不能将close_fds设置为True同时重定向子进程的标准输入、输出与错误(stdin, stdout, stderr)。
 79 shell：同上
 80 cwd：用于设置子进程的当前目录
 81 env：用于指定子进程的环境变量。如果env = None，子进程的环境变量将从父进程中继承。
 82 universal_newlines：不同系统的换行符不同，True -> 同意使用 \\n
 83 startupinfo与createionflags只在windows下有效
 84 将被传递给底层的CreateProcess()函数，用于设置子进程的一些属性，如：主窗口的外观，进程的优先级等等
 85 
 86 # 切换目录
 87 >>> res = subprocess.Popen("pwd",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
 88 >>> res.stdout.read()
 89 b\'/tmp\\n\'
 90 
 91 # 定义环境变量
 92 >>> res = subprocess.Popen("echo $TEST",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,env={"TEST":"aige"})
 93 >>> res.stdout.read()
 94 b\'aige\\n\'
 95 
 96 # 先执行函数
 97 >>> res = subprocess.Popen("echo $TEST",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,preexec_fn=name)
 98 >>> res.stdout.read()
 99 b\'Aige\\n\\n\'
100 
101 
102 
103 \'\'\'

view code

terminate() 杀掉所启动进程

communicate() 等待任务结束

stdin 标准输入

stdout 标准输出

stderr 标准错误

subprocess.getstatusoutput(\'ls /bin/ls\')

  1 args：shell命令，可以是字符串或者序列类型（如：list，元组）
  2 bufsize：指定缓冲。0 无缓冲,1 行缓冲,其他 缓冲区大小,负值 系统缓冲
  3 stdin, stdout, stderr：分别表示程序的标准输入、输出、错误句柄
  4 preexec_fn：只在Unix平台下有效，用于指定一个可执行对象（callable object），它将在子进程运行之前被调用
  5 close_sfs：在windows平台下，如果close_fds被设置为True，则新创建的子进程将不会继承父进程的输入、输出、错误管道。
  6 所以不能将close_fds设置为True同时重定向子进程的标准输入、输出与错误(stdin, stdout, stderr)。
  7 shell：同上
  8 cwd：用于设置子进程的当前目录
  9 env：用于指定子进程的环境变量。如果env = None，子进程的环境变量将从父进程中继承。
 10 universal_newlines：不同系统的换行符不同，True -> 同意使用 \\n
 11 startupinfo与createionflags只在windows下有效
 12

1.6 re模块

常用正则表达式符号

  1 \'.\'     默认匹配除\\n之外的任意一个字符，若指定flag DOTALL,则匹配任意字符，包括换行
  2 \'^\'     匹配字符开头，若指定flags MULTILINE,这种也可以匹配上(r"^a","\\nabc\\neee",flags=re.MULTILINE)
  3 \'$\'     匹配字符结尾，或e.search("foo$","bfoo\\nsdfsf",flags=re.MULTILINE).group()也可以
  4 \'*\'     匹配*号前的字符0次或多次，re.findall("ab*","cabb3abcbbac")  结果为[\'abb\', \'ab\', \'a\']
  5 \'+\'     匹配前一个字符1次或多次，re.findall("ab+","ab+cd+abb+bba") 结果[\'ab\', \'abb\']
  6 \'?\'     匹配前一个字符1次或0次
  7 \'{m}\'   匹配前一个字符m次
  8 \'{n,m}\' 匹配前一个字符n到m次，re.findall("ab{1,3}","abb abc abbcbbb") 结果\'abb\', \'ab\', \'abb\']
  9 \'|\'     匹配|左或|右的字符，re.search("abc|ABC","ABCBabcCD").group() 结果\'ABC\'
 10 \'(...)\' 分组匹配，re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
 11 
 12 
 13 \'\\A\'    只从字符开头匹配，re.search("\\Aabc","alexabc") 是匹配不到的
 14 \'\\Z\'    匹配字符结尾，同$
 15 \'\\d\'    匹配数字0-9
 16 \'\\D\'    匹配非数字
 17 \'\\w\'    匹配[A-Za-z0-9]
 18 \'\\W\'    匹配非[A-Za-z0-9]
 19 \'s\'     匹配空白字符、\\t、\\n、\\r , re.search("\\s+","ab\\tc1\\n3").group() 结果 \'\\t\'
 20 
 21 \'(?P<name>...)\' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city") 结果{\'province\': \'3714\', \'city\': \'81\', \'birthday\': \'1993\'}
 22

最常用的匹配语法

  1 re.match 从头开始匹配
  2 re.search 匹配包含
  3 re.findall 把所有匹配到的字符放到以列表中的元素返回
  4 re.splitall 以匹配到的字符当做列表分隔符
  5 re.sub      匹配字符并替换
  6

反斜杠的困扰

与大多数编程语言相同，正则表达式里使用"\\"作为转义字符，这就可能造成反斜杠困扰。假如你需要匹配文本中的字符"\\"，那么使用编程语言表示的正则表达式里将需要4个反斜杠"\\\\\\\\"：前两个和后两个分别用于在编程语言里转义成反斜杠，转换成两个反斜杠后再在正则表达式里转义成一个反斜杠。Python里的原生字符串很好地解决了这个问题，这个例子中的正则表达式可以使用r"\\\\"表示。同样，匹配一个数字的"\\\\d"可以写成r"\\d"。有了原生字符串，你再也不用担心是不是漏写了反斜杠，写出来的表达式也更直观。

仅需轻轻知道的几个匹配模式

re.I(re.IGNORECASE): 忽略大小写（括号内是完整写法，下同）

M(MULTILINE): 多行模式，改变\'^\'和\'$\'的行为（参见上图）

S(DOTALL): 点任意匹配模式，改变\'.\'的行为

1.7 ConfigParser模块

  1 # import configparser
  2 # # 生成
  3 # config = configparser.ConfigParser()
  4 # config["DEFAULT"] = {\'ServerAliveInterval\': \'45\',
  5 #                       \'Compression\': \'yes\',
  6 #                      \'CompressionLevel\': \'9\'}
  7 #
  8 # config[\'bitbucket.org\'] = {}
  9 # config[\'bitbucket.org\'][\'User\'] = \'hg\'
 10 # config[\'topsecret.server.com\'] = {}
 11 # topsecret = config[\'topsecret.server.com\']
 12 # topsecret[\'Host Port\'] = \'50022\'     # mutates the parser
 13 # topsecret[\'ForwardX11\'] = \'no\'  # same here
 14 # config[\'DEFAULT\'][\'ForwardX11\'] = \'yes\'
 15 # with open(\'example.ini\', \'w\') as configfile:
 16 #    config.write(configfile)

生成

  1 # 读
  2 # >>> import configparser
  3 # >>> config = configparser.ConfigParser()
  4 # >>> config.sections()
  5 # []
  6 # >>> config.read(\'example.ini\')
  7 # [\'example.ini\']
  8 # >>> config.sections()
  9 # [\'bitbucket.org\', \'topsecret.server.com\']
 10 # >>> \'bitbucket.org\' in config
 11 # True
 12 # >>> \'bytebong.com\' in config
 13 # False
 14 # >>> config[\'bitbucket.org\'][\'User\']
 15 # \'hg\'
 16 # >>> config[\'DEFAULT\'][\'Compression\']
 17 # \'yes\'
 18 # >>> topsecret = config[\'topsecret.server.com\']
 19 # >>> topsecret[\'ForwardX11\']
 20 # \'no\'
 21 # >>> topsecret[\'Port\']
 22 # \'50022\'
 23 # >>> for key in config[\'bitbucket.org\']: print(key)
 24 # ...
 25 # user
 26 # compressionlevel
 27 # serveraliveinterval
 28 # compression
 29 # forwardx11
 30 # >>> config[\'bitbucket.org\'][\'ForwardX11\']
 31 # \'yes\'
 32 
 33 # 改
 34 # [section1]
 35 # k1 = v1
 36 # k2: v2
 37 #
 38 # [section2]
 39 # k1 = v1
 40 #
 41 # import ConfigParser
 42 #
 43 # config = ConfigParser.ConfigParser()
 44 # config.read(\'i.cfg\')
 45 
 46 # ########## 读 ##########
 47 # secs = config.sections()
 48 # print以上是关于07-模块：shutil，shelve，xml，hashlib，subprocess，re，ConfigParser的主要内容，如果未能解决你的问题，请参考以下文章

07-模块：shutil，shelve，xml，hashlib，subprocess，re，ConfigParser

第1章 模块

1.1 shutil 模块

1.2 shelve模块

1.3 xml处理模块

1.4 hashlib模块

1.5 Subprocess模块

1.6 re模块

1.7 ConfigParser模块

第1章模块