import argparse
import codecs
import re
"""
Reference: 'U+DEADBEEF: Why you shouldn't trust arbitrary text encodings'
<http://rspeer.github.io/blog/2014/03/30/unicode-deadbeef/>
"""
ENCODING_WHITELIST = re.compile(
r'ascii|'
r'utf-(8|16)|'
r'cp(437|125[0-8])|'
r'iso-8859-([1-9]|1[0-5])|'
r'mac-roman')
def is_supported_encoding(encoding):
"""
:returns: normalized encoding name or None if not found
:raises: LookupError (Python unable to locate encoding name)
"""
name = codecs.lookup(encoding).name
if ENCODING_WHITELIST.match(name):
return name
if __name__ == '__main__':
""" Test from command line """
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--encoding', default='utf-8', help=(
"check to see if encoding is supported "
"and return normalized encoding name"))
args = parser.parse_args()
try:
encoding_name = is_supported_encoding(args.encoding)
print("Supported encoding: '{}'".format(encoding_name) if encoding_name
else "Specified encoding is not supported by this application.")
except LookupError:
print("LookupError: Specified encoding is not supported by Python.")