Python之基于十六进制判断文件类型
Posted 小粉优化大师
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python之基于十六进制判断文件类型相关的知识,希望对你有一定的参考价值。
核心代码:
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Author : suk import struct from io import BytesIO # 支持文件类型 # 用16进制字符串的目的是可以知道文件头是多少字节 # 各种文件头的长度不一样,少则2字符,长则8字符 def typeList(types): type_dict = {\'jpg\': [\'FFD8FFE000104A464946\'], \'png\': [\'89504E470D0A1A0A0000\'], \'gif\': [\'47494638396126026F01\'], \'tif\': [\'49492A00227105008037\'], \'bmp\': [\'424D8E1B030000000000\'], \'dwg\': [\'41433130313500000000\'], \'html\': [\'3C21444F435459504520\'], \'htm\': [\'3C21646F637479706520\'], \'css\': [\'48544D4C207B0D0A0942\'], \'js\': [\'696B2E71623D696B2E71\'], \'rtf\': [\'7B5C727466315C616E73\'], \'psd\': [\'38425053000100000000\'], \'eml\': [\'46726F6D3A203D3F6762\'], \'wps\': [\'D0CF11E0A1B11AE10000\'], \'mdb\': [\'5374616E64617264204A\'], \'ps\': \'[252150532D41646F6265]\', \'pdf\': [\'255044462D312E\'], \'rmvb\': [\'2E524D46000000120001\'], \'flv\': [\'464C5601050000000900\'], \'mp4\': [\'00000020667479706D70\'], \'mp3\': [\'49443303000000002176\'], \'mpg\': [\'000001BA210001000180\'], \'wmv\': [\'3026B2758E66CF11A6D9\'], \'wav\': [\'52494646E27807005741\'], \'avi\': [\'52494646D07D60074156\'], \'mid\': [\'4D546864000000060001\'], \'zip\': [\'504B0304140000000800\', \'504B0304140000080800\', \'504B03040A0000080000\'], \'rar\': [\'526172211A0700CF9073\'], \'ini\': [\'235468697320636F6E66\'], \'jar\': [\'504B03040A0000000000\'], \'exe\': [\'4D5A9000030000000400\'], \'jsp\': [\'3C25402070616765206C\'], \'mf\': [\'4D616E69666573742D56\'], \'xml\': [\'3C3F786D6C2076657273\'], \'sql\': [\'494E5345525420494E54\'], \'java\': [\'7061636B616765207765\'], \'bat\': [\'406563686F206F66660D\'], \'gz\': [\'1F8B0800000000000000\'], \'properties\': [\'6C6F67346A2E726F6F74\'], \'class\': [\'CAFEBABE0000002E0041\'], \'chm\': [\'49545346030000006000\'], \'mxp\': [\'04000000010000001300\'], \'docx\': [\'504B0304140006000800\', \'504B03040A0000000000\'], \'torrent\': [\'6431303A637265617465\'], \'mov\': [\'6D6F6F76\'], \'wpd\': [\'FF575043\'], \'dbx\': [\'CFAD12FEC5FD746F\'], \'pst\': [\'2142444E\'], \'qdf\': [\'AC9EBD8F\'], \'pwl\': [\'E3828596\'], \'ram\': [\'2E7261FD\'] } ret = {} for k_hex, v_prefix in type_dict.items(): if k_hex in types: ret[k_hex] = v_prefix return ret # 字节码转16进制字符串 def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper() # 获取文件类型 def file_type(filename): binfile = open(filename, \'rb\') # 必需二制字读取 tl = typeList(types=["jpg", "zip", "docx"]) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要读多少字节 binfile.seek(0) # 每次读取都要回到文件头,不然会一直往后读取 hbytes = struct.unpack_from("B" * numOfBytes, binfile.read(numOfBytes)) # 一个 "B"表示一个字节 f_hcode = bytes2hex(hbytes) # 如果判断不出来,打印出这个值,往字典增加即可 # print("上传数据流hex", s_hcode, \'=\', "代码字典hex", hcode) # 如果判断不出来,打印出这个值,往字典增加即可 if f_hcode == hcode: flag = True break if flag: ftype = type_name break binfile.close() return ftype # 获取字节流类型 def stream_type(stream, types): """ :param stream:流数据 :param types:需要判断文件类型,格式:["jpg","jpn"] :return: """ tl = typeList(types=types) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要读多少字节 hbytes = struct.unpack_from("B" * numOfBytes, stream[0:numOfBytes]) # 一个 "B"表示一个字节 s_hcode = bytes2hex(hbytes) # print("上传数据流hex", s_hcode, \'=\', "代码字典hex", hcode) # 如果判断不出来,打印出这个值,往字典增加即可 if s_hcode == hcode: flag = True break if flag: ftype = type_name break return ftype def stream_split(stream, count=3): """ 主要处理流是分段获取的数据 :param stream: 块流 :param count: 取多少段合成来判断类型,默认三段 :return: """ block_stream = BytesIO() temp = 1 for block in stream: block_stream.write(block) if temp == count: break temp += 1 return block_stream.getvalue()
type_dict字典,根据自己上传的文件,来填写,数据来自互联网。
基于Flask的上传示例
@index.route(\'/upload\', methods=[\'GET\', \'POST\']) def upload(): if request.method == \'GET\': return render_template(\'upload.html\') upload_obj = request.files.get(\'code_file\') if not upload_obj: return \'没有选择文件上传\' ret = stream_type(stream_split(upload_obj.stream), ["jpg", "png", "pdf"]) if not ret: return \'上传失败,文件类型不匹配,类型必须 "jpg" or "png" or "pdf"\' file_name = upload_obj.filename upload_obj.save(os.path.join(\'files\', file_name)) return \'上传文件成功\'
upload.html
{% extends \'layout.html\' %} {% block content %} <h1>上传代码</h1> <form action="" method="post" enctype="multipart/form-data"> <input type="file" name="code_file"> <input type="submit" value="上传"></input> </form> {% endblock %}
开始上传文件:
上传不在列表中的文件类型
上传在列表中的文件类型
以上是关于Python之基于十六进制判断文件类型的主要内容,如果未能解决你的问题,请参考以下文章