07-模块:shutil,shelve,xml,hashlib,subprocess,re,ConfigParser
Posted 汉克书
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了07-模块:shutil,shelve,xml,hashlib,subprocess,re,ConfigParser相关的知识,希望对你有一定的参考价值。
第1章 模块
1.1 shutil 模块
1 import shutil 2 # 高级的 文件、文件夹、压缩包 处理模块 3 4 # 将文件内容拷贝到另一个文件中,可以部分内容 5 # 6 # shutil.copyfileobj(fsrc, fdst[, length]) 7 # f = open("example.log") 8 # f2 = open("新的","w") 9 # shutil.copyfileobj(f,f2) 10 # shutil.copyfile("example.log","新的2") 11 # shutil.copymode("example.log","新的2") # copy权限 12 13 # shutil.copystat("example.log","新的2") # copy copy状态 14 15 # shutil.copy(\'example.log\', \'新的3\') # 拷贝文件和权限 16 # shutil.copy2(\'example.log\', \'新的4\') # 拷贝文件和状态信息 17 # shutil.copytree(r"D:\\学习python\\pyn\\培训\\day5\\access.log.1",r"D:\\学习python\\pyn\\培训\\day6",ignore=shutil.ignore_patterns(\'*.pyc\', \'tmp*\')) 18 19 # shutil.rmtree() # 全删了 20 21 22 # shutil.make_archive(base_name, format,...) 打包 23 24 # shutil.make_archive("day5","zip","D:\\学习python\\pyn\\自学\\day5") 25 26 27 28 import zipfile 29 30 # 压缩 31 # z = zipfile.ZipFile(\'laxi.zip\', \'w\') 32 # z.write(\'example.log\') 33 # z.write(r\'D:\\学习python\\pyn\\培训\\day5\\access.log.1\',arcname="access.log.1") 34 # z.close() 35 36 # 解压 37 # z = zipfile.ZipFile(\'laxi.zip\', \'r\') 38 # z.extractall(members=["指定"],path=r"d:\\\\") 39 # z.extract("example.log") 40 # z.close() 41 42 # import tarfile 43 # 44 # # 压缩 45 # tar = tarfile.open(\'your.tar\',\'w\') 46 # tar.add(\'/Users/wupeiqi/PycharmProjects/bbs2.zip\', arcname=\'bbs2.zip\') 47 # tar.add(\'/Users/wupeiqi/PycharmProjects/cmdb.zip\', arcname=\'cmdb.zip\') 48 # tar.close() 49 # 50 # # 解压 51 # tar = tarfile.open(\'your.tar\',\'r\') 52 # tar.extractall() # 可设置解压地址 53 # tar.close()
1.2 shelve模块
shelve模块是一个简单的k,v将内存数据通过文件持久化的模块,可以持久化任何pickle可支持的python数据格式
1 #!/usr/bin/env python 2 # _*_ coding:utf-8 _*_ 3 4 import shelve 5 6 d = shelve.open(\'shelve_test\') #打开一个文件 7 8 class Test(object): 9 def __init__(self,n): 10 self.n = n 11 12 def func(): 13 print("11") 14 15 t = Test(123) 16 t2 = Test(123334) 17 18 name = ["alex","rain","test"] 19 d["test"] = name #持久化列表 20 d["t1"] = t #持久化类 21 d["t2"] = t2 22 d["t3"] = func 23 d.close() 24
1 # 反序列化 导入这个模块 才能导入 2 3 # >>> import shelve 4 # >>> s = shelve.open("shelve_test") 5 # >>> for i in s.keys():print(i) 6 # ... 7 # test 8 # t1 9 # t2 10 # t3 11
1 import shelve 2 d = shelve.open(\'shelve_test\') 3 # d.items() # 读所有 4 print(d.get("test")) 5 print(d.get("t1")) 6 print(d.get("t2")) 7 d.pop() # 删的就是硬盘数据 8 d["test"][0] = "1121" 9 d.close()
1.3 xml处理模块
1 <?xml version="1.0"?> 2 <data> 3 <country name="Liechtenstein"> 4 <rank updated="yes">2</rank> 5 <year>2008</year> 6 <gdppc>141100</gdppc> 7 <neighbor name="Austria" direction="E"/> 8 <neighbor name="Switzerland" direction="W"/> 9 </country> 10 <country name="Singapore"> 11 <rank updated="yes">5</rank> 12 <year>2011</year> 13 <gdppc>59900</gdppc> 14 <neighbor name="Malaysia" direction="N"/> 15 </country> 16 <country name="Panama"> 17 <rank updated="yes">69</rank> 18 <year>2011</year> 19 <gdppc>13600</gdppc> 20 <neighbor name="Costa Rica" direction="W"/> 21 <neighbor name="Colombia" direction="E"/> 22 </country> 23 </data>
1 # import xml.etree.ElementTree as ET 2 # 3 # tree = ET.parse("xmltest.xml") # 解析 4 # root = tree.getroot() # 根节点 5 # print(root.tag) 6 7 # #遍历xml文档 8 # for child in root: 9 # print(child.tag, child.attrib) 10 # for i in child: 11 # print(i.tag,i.text,i.attrib) 12 13 #只遍历year 节点 14 # for node in root.iter(\'year\'): 15 # print(node.tag,node.text) 16 17 18 19 20 # 改 21 # import xml.etree.ElementTree as ET 22 # 23 # tree = ET.parse("xmltest.xml") 24 # root = tree.getroot() 25 # 26 # #修改 27 # for node in root.iter(\'year\'): 28 # new_year = int(node.text) + 1 29 # node.text = str(new_year) 30 # node.set("updated","yes") # 更改属性 31 # 32 # tree.write("xmltest2.xml") 33 34 # 35 # #删除node 36 # for country in root.findall(\'country\'): 37 # rank = int(country.find(\'rank\').text) 38 # if rank > 50: 39 # root.remove(country) 40 # 41 # tree.write(\'output.xml\',encoding=\'utf8\') 42 43 44 # import xml.etree.ElementTree as ET 45 # 46 # 47 # new_xml = ET.Element("namelist") 48 # name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"}) 49 # age = ET.SubElement(name,"age",attrib={"checked":"no"}) 50 # sex = ET.SubElement(name,"sex") 51 # sex.text = \'33\' 52 # name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"}) 53 # age = ET.SubElement(name2,"age") 54 # age.text = \'19\' 55 # 56 # et = ET.ElementTree(new_xml) #生成文档对象 57 # et.write("test.xml", encoding="utf-8",xml_declaration=True) 58 # 59 # ET.dump(new_xml) #打印生成的格式
1.4 hashlib模块
1 import hashlib 2 3 m = hashlib.md5() 4 m.update(b"Hello") 5 m.update(b"It\'s me") 6 print(m.digest()) 7 m.update(b"It\'s been a long time since last time we ...") 8 9 print(m.digest()) # 2进制格式hash 10 print(len(m.hexdigest())) # 16进制格式hash 11 \'\'\' 12 def digest(self, *args, **kwargs): # real signature unknown 13 """ Return the digest value as a string of binary data. """ 14 pass 15 16 def hexdigest(self, *args, **kwargs): # real signature unknown 17 """ Return the digest value as a string of hexadecimal digits. """ 18 pass 19 20 \'\'\' 21 import hashlib 22 23 # ######## md5 ######## 24 25 hash = hashlib.md5() 26 hash.update(\'admin\') 27 print(hash.hexdigest()) 28 29 # ######## sha1 ######## 30 31 hash = hashlib.sha1() 32 hash.update(\'admin\') 33 print(hash.hexdigest()) 34 35 # ######## sha256 ######## 36 37 hash = hashlib.sha256() 38 hash.update(\'admin\') 39 print(hash.hexdigest()) 40 41 # ######## sha384 ######## 42 43 hash = hashlib.sha384() 44 hash.update(\'admin\') 45 print(hash.hexdigest()) 46 47 # ######## sha512 ######## 48 49 hash = hashlib.sha512() 50 hash.update(\'admin\') 51 print(hash.hexdigest()) 52
1.5 Subprocess模块
The subprocess module allows you to spawn new processes, connect to their input/output/error pipes, and obtain their return codes. This module intends to replace several older modules and functions:
os.system
os.spawn*
The recommended approach to invoking subprocesses is to use the run() function for all use cases it can handle. For more advanced use cases, the underlying Popen interface can be used directly.
The run() function was added in Python 3.5; if you need to retain compatibility with older versions, see the Older high-level API section.
subprocess.run(args, *, stdin=None, input=None, stdout=None, stderr=None, shell=False, timeout=None, check=False)
Run the command described by args. Wait for command to complete, then return a CompletedProcess instance.
The arguments shown above are merely the most common ones, described below in Frequently Used Arguments (hence the use of keyword-only notation in the abbreviated signature). The full function signature is largely the same as that of the Popen constructor - apart from timeout, input and check, all the arguments to this function are passed through to that interface.
This does not capture stdout or stderr by default. To do so, pass PIPE for the stdout and/or stderr arguments.
The timeout argument is passed to Popen.communicate(). If the timeout expires, the child process will be killed and waited for. The TimeoutExpired exception will be re-raised after the child process has terminated.
The input argument is passed to Popen.communicate() and thus to the subprocess’s stdin. If used it must be a byte sequence, or a string if universal_newlines=True. When used, the internal Popen object is automatically created withstdin=PIPE, and the stdin argument may not be used as well.
If check is True, and the process exits with a non-zero exit code, a CalledProcessError exception will be raised. Attributes of that exception hold the arguments, the exit code, and stdout and stderr if they were captured.
常用subprocess方法示例
1 2 \'\'\' 3 >>> import subprocess 4 >>> subprocess.run("df") 5 文件系统 1K-块 已用 可用 已用% 挂载点 6 udev 484160 0 484160 0% /dev 7 tmpfs 100728 9192 91536 10% /run 8 /dev/sda1 19478204 4900436 13565288 27% / 9 tmpfs 503636 204 503432 1% /dev/shm 10 tmpfs 5120 4 5116 1% /run/lock 11 tmpfs 503636 0 503636 0% /sys/fs/cgroup 12 tmpfs 100728 52 100676 1% /run/user/1000 13 CompletedProcess(args=\'df\', returncode=0) 14 >>> subprocess.run([\'df\',\'-h\']) 15 文件系统 容量 已用 可用 已用% 挂载点 16 udev 473M 0 473M 0% /dev 17 tmpfs 99M 9.0M 90M 10% /run 18 /dev/sda1 19G 4.7G 13G 27% / 19 tmpfs 492M 204K 492M 1% /dev/shm 20 tmpfs 5.0M 4.0K 5.0M 1% /run/lock 21 tmpfs 492M 0 492M 0% /sys/fs/cgroup 22 tmpfs 99M 52K 99M 1% /run/user/1000 23 CompletedProcess(args=[\'df\', \'-h\'], returncode=0) 24 >>> subprocess.run("df -h | grep /dev/sda1",shell=True) 25 /dev/sda1 19G 4.7G 13G 27% / 26 CompletedProcess(args=\'df -h | grep /dev/sda1\', returncode=0) 27 >>> 28 29 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True) # Popen 其实就是启用一个进程 30 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True) 31 >>> df:无效选项 -- s 32 Try \'df --help\' for more information. 33 34 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True) 35 >>> 36 #错误的话,终端会卡住。 37 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True,stdout=subprocess.PIPE) 38 >>> df:无效选项 -- s 39 Try \'df --help\' for more information. 40 41 >>> res = subprocess.Popen("df -h | grep /dev/sha1",shell=True,stdout=subprocess.PIPE) 42 >>> 43 44 # 错误正确一起输出 45 >>> res = subprocess.Popen("df -hsdas | grep /dev/sha1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 46 >>> res 47 <subprocess.Popen object at 0x7f20265b65f8> 48 >>> print(res) 49 50 >>> res.stdout.read() 51 b\'\' 52 >>> res.stderr.read() 53 b"df\\xef\\xbc\\x9a\\xe6\\x97\\xa0\\xe6\\x95\\x88\\xe9\\x80\\x89\\xe9\\xa1\\xb9 -- s\\nTry \'df --help\' for more information.\\n" 54 >>> 55 56 57 >>> res = subprocess.Popen("top -bn 3",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 58 >>> res.stdout.read() 59 # 会等执行万top命令后才返回 60 61 >>> res = subprocess.Popen("top -bn 1",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 62 >>> res.poll() 63 0 64 65 >>> res = subprocess.Popen("top -bn 2",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 66 >>> res.wait() 67 0 68 >>> # 会一直等到运行结束,然后返回 状态 69 70 res.terminate() 杀掉所启动进程 71 res.communicate(timeout=2) 等待任务结束 72 73 args:shell命令,可以是字符串或者序列类型(如:list,元组) 74 bufsize:指定缓冲。0 无缓冲,1 行缓冲,其他 缓冲区大小,负值 系统缓冲 75 stdin, stdout, stderr:分别表示程序的标准输入、输出、错误句柄 76 preexec_fn:只在Unix平台下有效,用于指定一个可执行对象(callable object),它将在子进程运行之前被调用 77 close_sfs:在windows平台下,如果close_fds被设置为True,则新创建的子进程将不会继承父进程的输入、输出、错误管道。 78 所以不能将close_fds设置为True同时重定向子进程的标准输入、输出与错误(stdin, stdout, stderr)。 79 shell:同上 80 cwd:用于设置子进程的当前目录 81 env:用于指定子进程的环境变量。如果env = None,子进程的环境变量将从父进程中继承。 82 universal_newlines:不同系统的换行符不同,True -> 同意使用 \\n 83 startupinfo与createionflags只在windows下有效 84 将被传递给底层的CreateProcess()函数,用于设置子进程的一些属性,如:主窗口的外观,进程的优先级等等 85 86 # 切换目录 87 >>> res = subprocess.Popen("pwd",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 88 >>> res.stdout.read() 89 b\'/tmp\\n\' 90 91 # 定义环境变量 92 >>> res = subprocess.Popen("echo $TEST",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,env={"TEST":"aige"}) 93 >>> res.stdout.read() 94 b\'aige\\n\' 95 96 # 先执行函数 97 >>> res = subprocess.Popen("echo $TEST",cwd="/tmp",shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,preexec_fn=name) 98 >>> res.stdout.read() 99 b\'Aige\\n\\n\' 100 101 102 103 \'\'\'
terminate() 杀掉所启动进程
communicate() 等待任务结束
stdin 标准输入
stdout 标准输出
stderr 标准错误
subprocess.getstatusoutput(\'ls /bin/ls\')
1 args:shell命令,可以是字符串或者序列类型(如:list,元组) 2 bufsize:指定缓冲。0 无缓冲,1 行缓冲,其他 缓冲区大小,负值 系统缓冲 3 stdin, stdout, stderr:分别表示程序的标准输入、输出、错误句柄 4 preexec_fn:只在Unix平台下有效,用于指定一个可执行对象(callable object),它将在子进程运行之前被调用 5 close_sfs:在windows平台下,如果close_fds被设置为True,则新创建的子进程将不会继承父进程的输入、输出、错误管道。 6 所以不能将close_fds设置为True同时重定向子进程的标准输入、输出与错误(stdin, stdout, stderr)。 7 shell:同上 8 cwd:用于设置子进程的当前目录 9 env:用于指定子进程的环境变量。如果env = None,子进程的环境变量将从父进程中继承。 10 universal_newlines:不同系统的换行符不同,True -> 同意使用 \\n 11 startupinfo与createionflags只在windows下有效 12
1.6 re模块
常用正则表达式符号
1 \'.\' 默认匹配除\\n之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行 2 \'^\' 匹配字符开头,若指定flags MULTILINE,这种也可以匹配上(r"^a","\\nabc\\neee",flags=re.MULTILINE) 3 \'$\' 匹配字符结尾,或e.search("foo$","bfoo\\nsdfsf",flags=re.MULTILINE).group()也可以 4 \'*\' 匹配*号前的字符0次或多次,re.findall("ab*","cabb3abcbbac") 结果为[\'abb\', \'ab\', \'a\'] 5 \'+\' 匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果[\'ab\', \'abb\'] 6 \'?\' 匹配前一个字符1次或0次 7 \'{m}\' 匹配前一个字符m次 8 \'{n,m}\' 匹配前一个字符n到m次,re.findall("ab{1,3}","abb abc abbcbbb") 结果\'abb\', \'ab\', \'abb\'] 9 \'|\' 匹配|左或|右的字符,re.search("abc|ABC","ABCBabcCD").group() 结果\'ABC\' 10 \'(...)\' 分组匹配,re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c 11 12 13 \'\\A\' 只从字符开头匹配,re.search("\\Aabc","alexabc") 是匹配不到的 14 \'\\Z\' 匹配字符结尾,同$ 15 \'\\d\' 匹配数字0-9 16 \'\\D\' 匹配非数字 17 \'\\w\' 匹配[A-Za-z0-9] 18 \'\\W\' 匹配非[A-Za-z0-9] 19 \'s\' 匹配空白字符、\\t、\\n、\\r , re.search("\\s+","ab\\tc1\\n3").group() 结果 \'\\t\' 20 21 \'(?P<name>...)\' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city") 结果{\'province\': \'3714\', \'city\': \'81\', \'birthday\': \'1993\'} 22
最常用的匹配语法
1 re.match 从头开始匹配 2 re.search 匹配包含 3 re.findall 把所有匹配到的字符放到以列表中的元素返回 4 re.splitall 以匹配到的字符当做列表分隔符 5 re.sub 匹配字符并替换 6
反斜杠的困扰
与大多数编程语言相同,正则表达式里使用"\\"作为转义字符,这就可能造成反斜杠困扰。假如你需要匹配文本中的字符"\\",那么使用编程语言表示的正则表达式里将需要4个反斜杠"\\\\\\\\":前两个和后两个分别用于在编程语言里转义成反斜杠,转换成两个反斜杠后再在正则表达式里转义成一个反斜杠。Python里的原生字符串很好地解决了这个问题,这个例子中的正则表达式可以使用r"\\\\"表示。同样,匹配一个数字的"\\\\d"可以写成r"\\d"。有了原生字符串,你再也不用担心是不是漏写了反斜杠,写出来的表达式也更直观。
仅需轻轻知道的几个匹配模式
re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同)
M(MULTILINE): 多行模式,改变\'^\'和\'$\'的行为(参见上图)
S(DOTALL): 点任意匹配模式,改变\'.\'的行为
1.7 ConfigParser模块
1 # import configparser 2 # # 生成 3 # config = configparser.ConfigParser() 4 # config["DEFAULT"] = {\'ServerAliveInterval\': \'45\', 5 # \'Compression\': \'yes\', 6 # \'CompressionLevel\': \'9\'} 7 # 8 # config[\'bitbucket.org\'] = {} 9 # config[\'bitbucket.org\'][\'User\'] = \'hg\' 10 # config[\'topsecret.server.com\'] = {} 11 # topsecret = config[\'topsecret.server.com\'] 12 # topsecret[\'Host Port\'] = \'50022\' # mutates the parser 13 # topsecret[\'ForwardX11\'] = \'no\' # same here 14 # config[\'DEFAULT\'][\'ForwardX11\'] = \'yes\' 15 # with open(\'example.ini\', \'w\') as configfile: 16 # config.write(configfile)
1 # 读 2 # >>> import configparser 3 # >>> config = configparser.ConfigParser() 4 # >>> config.sections() 5 # [] 6 # >>> config.read(\'example.ini\') 7 # [\'example.ini\'] 8 # >>> config.sections() 9 # [\'bitbucket.org\', \'topsecret.server.com\'] 10 # >>> \'bitbucket.org\' in config 11 # True 12 # >>> \'bytebong.com\' in config 13 # False 14 # >>> config[\'bitbucket.org\'][\'User\'] 15 # \'hg\' 16 # >>> config[\'DEFAULT\'][\'Compression\'] 17 # \'yes\' 18 # >>> topsecret = config[\'topsecret.server.com\'] 19 # >>> topsecret[\'ForwardX11\'] 20 # \'no\' 21 # >>> topsecret[\'Port\'] 22 # \'50022\' 23 # >>> for key in config[\'bitbucket.org\']: print(key) 24 # ... 25 # user 26 # compressionlevel 27 # serveraliveinterval 28 # compression 29 # forwardx11 30 # >>> config[\'bitbucket.org\'][\'ForwardX11\'] 31 # \'yes\' 32 33 # 改 34 # [section1] 35 # k1 = v1 36 # k2: v2 37 # 38 # [section2] 39 # k1 = v1 40 # 41 # import ConfigParser 42 # 43 # config = ConfigParser.ConfigParser() 44 # config.read(\'i.cfg\') 45 46 # ########## 读 ########## 47 # secs = config.sections() 48 # print以上是关于07-模块:shutil,shelve,xml,hashlib,subprocess,re,ConfigParser的主要内容,如果未能解决你的问题,请参考以下文章
Python函数和常用模块day06:shutil-shelve-xml-configparser模块
python 之 random 模块 shutil 模块shelve模块 xml模块
python学习道路(day6note)(time &datetime,random,shutil,shelve,xml处理,configparser,hashlib,logging模块,re