python Python.DataTypes.Dictionaries
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python Python.DataTypes.Dictionaries相关的知识,希望对你有一定的参考价值。
def sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False):
'''
Author: p.doulgeridis
Name: sort_dict_lists_adv
Function: sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False)
Input: * <Dictionary>dict_in
* <Integer>index_in
* <Binary - [ key | value ]
* <Binary - [ True | False ]
Output: sorted list of tuple value pairs
Usage: print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))
Notes: Dictionaries do not maintain order. The function returns a list of tuple
key value pairs. If we want to recast that into a dictionary, we can use
dict(). If we want to cast that into a dictionary that can maintain order
we need to use 'OrderedDict' from the 'collections' module.
'''
if reverse == False:
if mode == "key":
return sorted(dict_in.items(), key=lambda x: x[1], reverse=False)
elif mode == "value":
return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=False)
elif reverse == True:
if mode == "key":
return sorted(dict_in.items(), key=lambda x: x[1], reverse=True)
elif mode == "value":
return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=True)
import operator
xs = {'a': 4, 'b': 3, 'c': 2, 'd': 1}
xs2 = {'a': [ 4, 1 ], 'b': [ 3, 2 ], 'c': [ 2, 3 ], 'd': [ 1, 5] }
xs3 = {'a': [ 4, 1, 2 ], 'b': [ 3, 2, 1 ], 'c': [ 2, 3, 4 ], 'd': [ 1, 5, 3] }
xs4 = {'a': [ 4, 1, 2, 1 ], 'b': [ 3, 2, 4, 6 ], 'c': [ 2, 3, 3, 4 ], 'd': [ 1, 5, 6, 7] }
# sort dict by key, normal
print(dict(sorted(xs.items(), key=lambda x: x[1])))
print(dict(sorted(xs.items(), key=lambda x: x[0])))
# sort dict by key, reverse
print(dict(sorted(xs.items(), key=lambda x: x[1], reverse=True)))
# sort dict by key, second way
print(sorted(xs.items(), key=operator.itemgetter(1)))
# sort dict by key, second way, reverse
print(sorted(xs.items(), key=operator.itemgetter(1), reverse=True))
# sort dict of lists, by any key
print(sorted(xs2.items(), key=lambda x: x[1][1], reverse=True))
#print(sorted(xs2.items(), key=operator.itemgetter(2), reverse=True))
# ##################
# sort dict of lists, by any key
print(dict(sorted(xs3.items(), key=lambda x: x[1][2], reverse=True)))
# sort dict of lists, by any key
print(sorted(xs4.items(), key=lambda x: x[1][3], reverse=True))
# sort dict of lists, by any key
print(sorted(xs4.items(), key=lambda x: x[1][3], reverse=False))
def sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False):
'''
Author: p.doulgeridis
Name: sort_dict_lists_adv
Function: sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False)
Input: * <Dictionary>dict_in
* <Integer>index_in
* <Binary - [ key | value ]
* <Binary - [ True | False ]
Output: sorted list of tuple value pairs
Usage: print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))
Notes: Dictionaries do not maintain order. The function returns a list of tuple
key value pairs. If we want to recast that into a dictionary, we can use
dict(). If we want to cast that into a dictionary that can maintain order
we need to use 'OrderedDict' from the 'collections' module.
'''
if reverse == False:
if mode == "key":
return sorted(dict_in.items(), key=lambda x: x[1], reverse=False)
elif mode == "value":
return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=False)
elif reverse == True:
if mode == "key":
return sorted(dict_in.items(), key=lambda x: x[1], reverse=True)
elif mode == "value":
return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=True)
def sort_dict_simple(dict_in, mode="key", reverse=False):
'''
Author: p.doulgeridis
Name: sort_dict_simple
Function: sort_dict_simple(dict_in, mode="key", reverse=False)
Input: * <Dictionary>dict_in
* <Binary - [ key | value ]
* <Binary - [ True | False ]
Output: sorted list of tuple value pairs
Usage: print(sort_dict_simple(xs, mode="value", reverse=False))
Notes: Dictionaries do not maintain order. The function returns a list of tuple
key value pairs. If we want to recast that into a dictionary, we can use
dict(). If we want to cast that into a dictionary that can maintain order
we need to use 'OrderedDict' from the 'collections' module.
'''
if reverse == False:
if mode == "key":
return (sorted(dict_in.items(), key=lambda x: x[0], reverse=False))
elif mode == "value":
return (sorted(dict_in.items(), key=lambda x: x[1], reverse=False))
elif reverse == True:
if mode == "key":
return (sorted(dict_in.items(), key=lambda x: x[0], reverse=True))
elif mode == "value":
return (sorted(dict_in.items(), key=lambda x: x[1], reverse=True))
print("------------------")
def sort_dict_simple_v2(dict_in, mode="key", reverse=False):
'''
Author: p.doulgeridis
Name: sort_dict_simple_v2
Function: sort_dict_simple_v2(dict_in, mode="key", reverse=False)
Input: * <Dictionary>dict_in
* <Binary - [ key | value ]
* <Binary - [ True | False ]
Output: sorted list of tuple value pairs
Usage: print(sort_dict_simple_v2(xs, mode="value", reverse=True))
Notes: Dictionaries do not maintain order. The function returns a list of tuple
key value pairs. If we want to recast that into a dictionary, we can use
dict(). If we want to cast that into a dictionary that can maintain order
we need to use 'OrderedDict' from the 'collections' module.
'''
if reverse == False:
if mode == "key":
try:
return (sorted(dict_in.items(), key=operator.itemgetter(0), reverse=False))
except:
return "Could not sort dictionary"
elif mode == "value":
try:
return (sorted(dict_in.items(), key=operator.itemgetter(1), reverse=False))
except:
return "Could not sort dictionary"
elif reverse == True:
if mode == "key":
return (sorted(dict_in.items(), key=operator.itemgetter(0), reverse=True))
elif mode == "value":
return (sorted(dict_in.items(), key=operator.itemgetter(1), reverse=True))
print(sort_dict_simple(xs, mode="key", reverse=True))
print(sort_dict_simple(xs, mode="key", reverse=False))
print(sort_dict_simple_v2(xs, mode="value", reverse=True))
print(sort_dict_simple(xs, mode="value", reverse=False))
print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))
def twoliststozip(list1, list2):
'''
Author: p.doulgeridis
Name: twoliststozip
Function: twoliststozip(list1, list2)
Input: * <list>list1
* <list>list2
Output: Dictionary mapping of list1, list2
Usage: b = twoliststozip(list1, list2)
Notes: To be effective, list1, list2 needs to be
of the same size.
'''
if len(list1) != len(list2):
return "Error: Input lists have different lengths"
return dict(zip(list1, list2))
def twoliststozipv2(list1, list2):
'''
Author: p.doulgeridis
Name: twoliststozipv2
Function: twoliststozipv2(list1, list2)
Input: * <list>list1
* <list>list2
Output: Dictionary mapping of list1, list2
Usage: b = twoliststozipv2(list1, list2)
Notes: To be effective, list1, list2 needs to be
of the same size.
'''
if len(list1) != len(list2):
return "Error: Input lists have different lengths"
out_dict = { k: v for k, v in zip(list1, list2) }
return out_dict
def twolisttozip_py2(list1, list2):
'''
Author: p.doulgeridis
Name: twoliststozip_py2
Function: twolisttozip_py2(list1, list2)
Input: * <list>list1
* <list>list2
Output: Dictionary mapping of list1, list2
Usage: b = twoliststozipv2(list1, list2)
Notes: To be effective, list1, list2 needs to be
of the same size.
'''
try:
from itertools import izip
except ImportError:
izip = zip
if len(list1) != len(list2):
return "Error: Input lists have different lengths"
return dict(izip(list1, list2))
def twoliststozip_dictcomp(list1, list2):
'''
Author: p.doulgeridis
Name: twoliststozip_dictcomp
Function: twoliststozip_dictcomp(list1, list2)
Input: * <list>list1
* <list>list2
Output: Dictionary mapping of list1, list2
Usage: b = twoliststozipv2(list1, list2)
Notes: To be effective, list1, list2 needs to be
of the same size.
'''
if len(list1) != len(list2):
return "Error: Input lists have different lengths"
out_dict = { list1[i]: list2[i] for i in range(len(list1))}
return out_dict
a = [ "a", "b", "c", "d" ]
b = [ 1, 2, 3 ]
print(twoliststozip(a, b))
print(twoliststozipv2(a, b))
print(twolisttozip_py2(a, b))
print(twoliststozip_dictcomp(a, b))
Functions on dictionaries:
1. pretty_print : Prints a nice, readable version of the dictionary
2. filter_dict_keys : Filters out keys from a dictionary, based on list provided
3. key_max_value : Return dictionary key that contains max value
4. remove_key : Remove a key from a dictionary
5. count_smaller : Count how many keys have value smaller than limit
6. count_larger : Count how many keys have value bigger than limit
7. filter_smaller : Filter keys with values smaller than limit
8. filter_larger : Filter keys with values larger than limit
9. filter_array_dict : Filter keys with values in array
10. dict_frequency : Counts frequency of keys per value in dict
11. reverse_dict : Swaps values and keys of dictionary
12. dict_index_search : Searches for indexes, stores in dictionary
13. MOD_defaultdict.py : Tutorial to default dict module
14. Key_Counter.py : Counts frequencies and outputs to different files
15. MOD_collections_Counter.py : (Includes default dict) - Optimised collections
16. MOD_collections_DefaultDict.py : Optimised default dictionary structures
17. MOD_collections_OrderedDict.py : Optimised ordered dictionary structures
15. MOD_collections_DeQueue.py : Optimised dequeue dictionary structures
16. MOD_collections_NamedTuple.py : Optimised named tuple dictionary structures
17. loadfiletodict.py : Load text file to dictionary methods
18. TwoListsToDict.py : Maps two lists into a dictionary
19. DictSorting.py : Sorting dictionaries by key or value
20. DictSortingXD.py : Sorting dictionaries of LISTS by key or value
import os
import sys
file_in = sys.argv[1]
def loadfiletodict(file_in, start, length, strip="yes"):
'''
Name: loadfiletodict
Function: loadfiletodict(file_in, start, length, strip="yes")
Description: Loads a text file to a dictionary, based on specific
substring provided. Those substrings will be used as keys.
Each key will hold a list of [ <line1>, <line2> .. ]
*line1, line2 share the same key.
Input: 1. <file_in> : Input text file (Best encoded in utf-8)
2. start : Start of substring
3. length : End of substring
4. [ strip="yes" | strip="no ]
Output: (<dictionary>, <int>)
1. <dictionary> : produced dictionary, with substrings as keys.
2. <int> : number of non duplicate unique keys (Length of dict)
Usage: my_in = loadfiletodict(file_in, 26, 3) (default)
my_in = loadfiletodict(file_in, 26, 3, strip="no") (non stripping)
Required: Collections (Module -> python -m pip install collections)
os (Module -> Base python distro)
Notes: Modules can be embedded in fuction call. Uncomment if need.
'defaultdict' is faster, but it returns an item of type <defaultdict>,
which is different than <dict>. Problems may be encountered with module
handling of the dict (ie, pretty print). We can cast the produced defaultdict
into a dict by doing:
dict_out = dict(default_dict)
'''
# import os
import os
# import collections
try:
import collections
except:
return "Cannot import collections module"
# declare new default dict (use collections)
try:
dict_out = collections.defaultdict(list)
except:
return "Could not initialize collection structure"
# check if input file exists
if not os.path.isfile(file_in):
return ("ERROR: Reading dictionary.", 0)
with open(file_in, 'r') as f:
for line in f:
# Handle new line strip
if strip == "yes":
line = line.rstrip()
# Calc start / end
start_py = int(start) - 1
end_py = start_py + int(length)
# parse key
key_in = line[start_py:end_py]
dict_out[key_in].append(line)
return (dict_out, len(dict_out.keys()))
def loadfiletodictcounter(file_in, start, length, strip="yes"):
'''
Name: loadfiletodictcounter
Function: loadfiletodictcounter(file_in, start, length, strip="yes")
Description: Loads a text file to a dictionary, based on specific
substring provided. Those substrings will be used as keys.
Each key will hold a list of [ counter, <line1>, <line2> .. ]
*line1, line2 share the same key.
Input: 1. <file_in> : Input text file (Best encoded in utf-8)
2. start : Start of substring
3. length : End of substring
4. [ strip="yes" | strip="no ]
Output: (<dictionary>, <int>)
1. <dictionary> : produced dictionary, with substrings as keys.
2. <int> : number of non duplicate unique keys (Length of dict)
Usage: my_in = loadfiletodict(file_in, 26, 3) (default)
my_in = loadfiletodict(file_in, 26, 3, strip="no") (non stripping)
Required: Collections (Module -> python -m pip install collections)
os (Module -> Base python distro)
Notes: Modules can be embedded in fuction call. Uncomment if need.
'defaultdict' is faster, but it returns an item of type <defaultdict>,
which is different than <dict>. Problems may be encountered with module
handling of the dict (ie, pretty print). We can cast the produced defaultdict
into a dict by doing:
dict_out = dict(default_dict)
* Differs from simple method -> key line counter.
'''
# import collections
try:
import collections
except:
return "Cannot import collections module"
# declare new default dict (use collections)
try:
dict_out = collections.defaultdict(list)
except:
return "Could not initialize collection structure"
# check if input file exists
if not os.path.isfile(file_in):
return ("ERROR: Reading dictionary.", 0)
with open(file_in, 'r') as f:
for line in f:
# Handle new line strip
if strip == "yes":
line = line.rstrip()
# Calc start / end
start_py = int(start) - 1
end_py = start_py + int(length)
# parse key
key_in = line[start_py:end_py]
if key_in not in dict_out.keys():
dict_out[key_in].append(1)
dict_out[key_in].append(line)
else:
dict_out[key_in][0] += 1
dict_out[key_in].append(line)
return (dict_out, len(dict_out.keys()))
def dicttofilepy(dict_in, file_ot):
'''
Name: py_format
Description: Outputs a file that contains the
definition of <input> ready to be
imported (See Notes and sample code)
Input: <input data structure> <filename>
Output: <filename> that contains input in python format
Usage: py_format(project_info, "OUTDATA.py")
Notes:
py_format(project_info, "OUTDATA.py")
import OUTDATA
data = OUTDATA.allData
Notes2: Check also in 'Python Useful Tricks'
'''
import pprint
input = dict(input)
# check if file_ot ends in .py
if file_ot.lower().endswith('.py'):
try:
resultFile = open(file_ot, 'w')
resultFile.write('allData = ' + pprint.pformat(input))
resultFile.close()
except:
print("Problem loading data to: " + str(file_ot))
return False
else:
return True
def py_format(input, file_ot):
'''
Name: py_format
Description: Outputs a file that contains the
definition of <input> ready to be
imported (See Notes and sample code)
Input: <input data structure> <filename>
Output: <filename> that contains input in python format
Usage: py_format(project_info, "OUTDATA.py")
Notes:
py_format(project_info, "OUTDATA.py")
import OUTDATA
data = OUTDATA.allData
Notes2: Check also in 'Python Useful Tricks'
'''
import pprint
input = dict(input)
# check if file_ot ends in .py
if file_ot.lower().endswith('.py'):
try:
resultFile = open(file_ot, 'w')
resultFile.write('allData = ' + pprint.pformat(input))
resultFile.close()
except:
print("Problem loading data to: " + str(file_ot))
return False
else:
return True
def pretty_print(input, indent_depth):
'''
Name: pretty_print
Description: pretty_prints a data structure
Input: <List/Tuple/Dict>, indent_depth
Output: stdout
Usage: pretty_print(dict_in, 4)
Notes: Works on any kind of data structure.
Requires: pprint module
'''
import pprint
try:
pprint.pprint(input)
#pprint.PrettyPrinter(indent=indent_depth)
except:
print("Pretty print failed")
my_in = loadfiletodict(file_in, 26, 3)
pretty_print(my_in, 3)
my_in2 = loadfiletodictcounter(file_in, 26, 3)
pretty_print(my_in2, 3)
py_format(my_in2[0], 'pydata.py')
dict_new = {
'ena' : 1,
'dyo' : 2
}
py_format(dict_new, 'pydata2.py')
#from pydata2 import allData
from pydata import allData
print(allData)
################################################################
# NAMED TUPLES
# Sometimes, it is convenient to access to a mutable element by its names rather than
# an index. This is possible with the collections.namedtuple() function. Your first need
# to design a structure using namedtuple function. Then, you create the named tuple.
person = collections.namedtuple("FirstName", "Surname", "age")
persons = []
persons.append(person("Alain", "Delon", 32))
persons.append(person("Jean", "Gabin", 39))
# You can now access to the first names of each tuple using the name attribute:
first_names = [x.name for x in persons]
##############################################################
# DEQUE - DOUBLE ENDED QUEUES
Double-ended queues, or deques, can be useful when you need to remove elements in the order in which
they were added. You can find the deque functions in the collections module.
>>> from collections import deque
>>> q = deque(range(5))
>>> q.append(5)
>>> q.appendleft(6)
>>> q
deque([6, 0, 1, 2, 3, 4, 5])
>>> q.pop()
5
>>> q.popleft()
6
>>> q.rotate(3)
>>> q
deque([2, 3, 4, 0, 1])
#The reason for the usefulness of the deque is that it allows appending and popping efficiently at the
#beginning (to the left), as opposed to lists. As a nice side effect, you can also rotate the elements
#(that is, shift them to the right or left, wrapping around the ends) efficiently. Deque objects also
#have extend and extendleft methods, with extend working like the corresponding list method, and extend
#left working analogously to appendleft. Note that the elements in the iterable used in extendleft will
#appear in the deque in reverse order.
# Various uses
>>> from collections import deque
>>> d = deque('ghi') # make a new deque with three items
>>> for elem in d: # iterate over the deque's elements
... print elem.upper()
G
H
I
>>> d.append('j') # add a new entry to the right side
>>> d.appendleft('f') # add a new entry to the left side
>>> d # show the representation of the deque
deque(['f', 'g', 'h', 'i', 'j'])
>>> d.pop() # return and remove the rightmost item
'j'
>>> d.popleft() # return and remove the leftmost item
'f'
>>> list(d) # list the contents of the deque
['g', 'h', 'i']
>>> d[0] # peek at leftmost item
'g'
>>> d[-1] # peek at rightmost item
'i'
>>> list(reversed(d)) # list the contents of a deque in reverse
['i', 'h', 'g']
>>> 'h' in d # search the deque
True
>>> d.extend('jkl') # add multiple elements at once
>>> d
deque(['g', 'h', 'i', 'j', 'k', 'l'])
>>> d.rotate(1) # right rotation
>>> d
deque(['l', 'g', 'h', 'i', 'j', 'k'])
>>> d.rotate(-1) # left rotation
>>> d
deque(['g', 'h', 'i', 'j', 'k', 'l'])
>>> deque(reversed(d)) # make a new deque in reverse order
deque(['l', 'k', 'j', 'i', 'h', 'g'])
>>> d.clear() # empty the deque
>>> d.pop() # cannot pop from an empty deque
Traceback (most recent call last):
File "<pyshell#6>", line 1, in -toplevel-
d.pop()
IndexError: pop from an empty deque
>>> d.extendleft('abc') # extendleft() reverses the input order
>>> d
deque(['c', 'b', 'a'])
# Various implementations
# This section shows various approaches to working with deques.
# Bounded length deques provide functionality similar to the tail filter in Unix:
def tail(filename, n=10):
'Return the last n lines of a file'
return deque(open(filename), n)
# Another approach to using deques is to maintain a sequence of recently added elements by appending to the right and popping to the left:
def moving_average(iterable, n=3):
# moving_average([40, 30, 50, 46, 39, 44]) --> 40.0 42.0 45.0 43.0
# http://en.wikipedia.org/wiki/Moving_average
it = iter(iterable)
d = deque(itertools.islice(it, n-1))
d.appendleft(0)
s = sum(d)
for elem in it:
s += elem - d.popleft()
d.append(elem)
yield s / float(n)
# The rotate() method provides a way to implement deque slicing and deletion.
# For example, a pure Python implementation of del d[n] relies on the rotate()
# method to position elements to be popped:
def delete_nth(d, n):
d.rotate(-n)
d.popleft()
d.rotate(n)
# To implement deque slicing, use a similar approach applying rotate() to bring
# a target element to the left side of the deque. Remove old entries with
# popleft(), add new entries with extend(), and then reverse the rotation. With
# minor variations on that approach, it is easy to implement Forth style stack
# manipulations such as dup, drop, swap, over, pick, rot, and roll.
###########################################################
# ORDERED DICT
# OrderedDict: dict subclass that remembers the order entries were
# added(as dictionaries doesn’t retain order when created)
# Use: We can use ordered dicts if we want dictionaries to remember order of entry.
# No order - standard use
>>> d = {“a”:1,”b”:2,”c”:3,”d”:4,”e”:5}
>>> for k,v in d.items():
… print(k,v)
…
a 1
b 2
c 3
e 5 # <-- as normal dictionaries are just the mapping,items are out of order
d 4
# Using ordered dict -> Now let’s do it using OrderedDict
>>> d = OrderedDict()
>>> d = {“a”:1,”b”:2,”c”:3,”d”:4,”e”:5}
>>> for k,v in d.items():
… print(k,v)
…
a 1
b 2
c 3
d 4
e 5
# Since an ordered dictionary remembers its insertion order, it can
# be used in conjunction with sorting to make a sorted dictionary:
>>> # regular unsorted dictionary
>>> d = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2}
>>> # dictionary sorted by key
>>> OrderedDict(sorted(d.items(), key=lambda t: t[0]))
OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])
>>> # dictionary sorted by value
>>> OrderedDict(sorted(d.items(), key=lambda t: t[1]))
OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])
>>> # dictionary sorted by length of the key string
>>> OrderedDict(sorted(d.items(), key=lambda t: len(t[0])))
OrderedDict([('pear', 1), ('apple', 4), ('orange', 2), ('banana', 3)])
# The new sorted dictionaries maintain their sort order when entries are
# deleted. But when new keys are added, the keys are appended to the end
# and the sort is not maintained.
# It is also straight-forward to create an ordered dictionary variant that
# remembers the order the keys were last inserted. If a new entry overwrites
# an existing entry, the original insertion position is changed and moved to the end:
class LastUpdatedOrderedDict(OrderedDict):
'Store items in the order the keys were last added'
def __setitem__(self, key, value):
if key in self:
del self[key]
OrderedDict.__setitem__(self, key, value)
# An ordered dictionary can be combined with the Counter class so that the counter
#remembers the order elements are first encountered:
class OrderedCounter(Counter, OrderedDict):
'Counter that remembers the order elements are first encountered'
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))
def __reduce__(self):
return self.__class__, (OrderedDict(self),)
#####################################################
# DEFAULT DICT
# defaultdict: dict subclass that calls a factory function to supply missing values
# A defaultdict will never raise a KeyError. Any key that doesn’t exist gets the
# value returned by the default factory.So the key takeaway it’s doesn’t throw a key
# error instead it will initialize the default value that we placed when we
# created the default dictionary
# Sample use - Creates an empty dictionary of lists
dict_out = collections.defaultdict(list)
# Standard use and problem with keyerror
>>> from collections import defaultdict
>>> mydict = {}
>>> mydict[1]
Traceback (most recent call last):
File “<stdin>”, line 1, in <module>
KeyError: 1
# Same with defaultdict
>>> mydict = defaultdict(object)
>>> mydict[1]
<object object at 0x101217110>
# Using list as the default_factory, it is easy to group a sequence of
#key-value pairs into a dictionary of lists:
>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
>>> d = defaultdict(list)
>>> for k, v in s:
... d[k].append(v)
...
>>> d.items()
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
# When each key is encountered for the first time, it is not already in
# the mapping; so an entry is automatically created using the default_factory
# function which returns an empty list. The list.append() operation then
# attaches the value to the new list. When keys are encountered again, the
# look-up proceeds normally (returning the list for that key) and the
# list.append() operation adds another value to the list. This technique is
# simpler and faster than an equivalent technique using dict.setdefault():
>>> d = {}
>>> for k, v in s:
... d.setdefault(k, []).append(v)
...
>>> d.items()
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
# Setting the default_factory to int makes the defaultdict useful for counting
#(like a bag or multiset in other languages):
>>> s = 'mississippi'
>>> d = defaultdict(int)
>>> for k in s:
... d[k] += 1
...
>>> d.items()
[('i', 4), ('p', 2), ('s', 4), ('m', 1)]
# When a letter is first encountered, it is missing from the mapping, so the
# default_factory function calls int() to supply a default count of zero. The
# increment operation then builds up the count for each letter.
# The function int() which always returns zero is just a special case of
# constant functions. A faster and more flexible way to create constant
# functions is to use itertools.repeat() which can supply any constant value
# (not just zero):
>>> def constant_factory(value):
... return itertools.repeat(value).next
>>> d = defaultdict(constant_factory('<missing>'))
>>> d.update(name='John', action='ran')
>>> '%(name)s %(action)s to %(object)s' % d
'John ran to <missing>'
# Setting the default_factory to set makes the defaultdict useful for
# building a dictionary of sets:
>>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)]
>>> d = defaultdict(set)
>>> for k, v in s:
... d[k].add(v)
...
>>> d.items()
[('blue', set([2, 4])), ('red', set([1, 3]))]
###################################################
# COUNTER
# Counter is a dict subclass for counting hashable objects.
# We use it to quickly create counters in dictionaries.
##
# Necessary module
from collections import Counter
##
# Uses : quickly count frequencies in list
mylist = [1,2,1,3,1,4]
print(Counter(mylist))
>> Counter({1:3, 2:1, 3:1, 4:1)}
##
# Uses: quickly count frequencies in a string
>>> mystr = “mynasdbdadddndashhhqas”
>>> Counter(mystr)
Counter({‘d’: 6, ‘a’: 4, ‘s’: 3, ‘h’: 3, ’n’: 2, ‘m’: 1, ‘y’: 1, ‘b’: 1, ‘q’: 1})
##
# Uses: count word frequencies
>>> mysent = “Welcome to the world of Python Python is an awesome langauge”
>>> mysent = mysent.split()
>>> Counter(mysent)
Counter({‘Python’: 2, ‘Welcome’: 1, ‘to’: 1, ‘the’: 1, ‘world’: 1, ‘of’: 1, ‘is’: 1, ‘an’: 1, ‘awesome’: 1, ‘langauge’: 1})
##
# Show X most common words
>>> mysent = “Welcome to the world of Python Python is an awesome langauge to learn”
>>> mysent = mysent.split()
>>> x = Counter(mysent)
>>> x.most_common(2)
[(‘to’, 2), (‘Python’, 2)]
##
# Tally occurrences of words in a list
>>> sum(x.values())
13
##
# Tally occurrences of words in a list
>>> cnt = Counter()
>>> for word in ['red', 'blue', 'red', 'green', 'blue', 'blue']:
... cnt[word] += 1
>>> cnt
Counter({'blue': 3, 'red': 2, 'green': 1})
##
# Find the ten most common words in Hamlet
>>> import re
>>> words = re.findall(r'\w+', open('hamlet.txt').read().lower())
>>> Counter(words).most_common(10)
[('the', 1143), ('and', 966), ('to', 762), ('of', 669), ('i', 631),
('you', 554), ('a', 546), ('my', 514), ('hamlet', 471), ('in', 451)]
##
# Counter objects have a dictionary interface except that they return
#a zero count for missing items instead of raising a KeyError:
>>> c = Counter(['eggs', 'ham'])
>>> c['bacon'] # count of a missing element is zero
0
##
# Using "Elements" -> elements()
# Return an iterator over elements repeating each as many times as its count.
# Elements are returned in arbitrary order. If an element’s count is less than
#one, elements() will ignore it.
>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> list(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"
def filetoliststrip(file):
'''
Function: filetoliststrip
Description: Reads a file, stores in list (stripped)
Input: File
Output: List
Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
content = [x.strip() for x in lines]
return content
dict_in = dict()
seen = []
fileinlist = filetoliststrip(file_in)
out_file = open(file_ot, 'w')
out_file2 = open(file_ot2, 'w')
out_file3 = open(file_ot3, 'w')
for line in fileinlist:
keyf = line[10:69]
if keyf not in dict_in.keys():
dict_in[keyf] = []
dict_in[keyf].append(1)
dict_in[keyf].append(line)
else:
dict_in[keyf][0] += 1
dict_in[keyf].append(line)
for j in dict_in.keys():
#print(dict_in[j])
if dict_in[j][0] < 2:
out_file.write(dict_in[j][1])
elif dict_in[j][0] == 2:
out_file2.write(dict_in[j][2])
elif dict_in[j][0] > 2:
out_file3.write(dict_in[j][3])
out_file.close()
out_file2.close()
out_file3.close()
# source: webpy
def dictreverse(mapping):
"""
Function: dictreverse
Description: Swaps values and keys of dictionary
Input: dictionary
Output: dict with swapped key/values
Usage : a = dictreverse(c)
Notes : Returns a new dictionary with keys and values swapped.
>>> dictreverse({1: 2, 3: 4})
{2: 1, 4: 3}
"""
return dict([(value, key) for (key, value) in mapping.items()])
c = { 'a': 1 , 'b' : 2 , 'c' : 3 }
print dictreverse(c)
#The del statement removes an element:
del d[key]
#However, this mutates the existing dictionary so the contents of the dictionary changes for anybody else who has a reference to the same instance. To return a new dictionary, make a copy of the dictionary:
def removekey(d, key):
r = dict(d)
del r[key]
return r
#The dict() constructor makes a shallow copy. To make a deep copy, see the copy module.
def pretty_print(b):
'''
Function: pretty_print
Description : Pretty prints a dictionary
Input : Dictionary
Output: STDOUT
Usage(print) : pretty_print(b)
Usage(Assign): b = pretty_print(b) - True
Notes : Only prints on screen
'''
print ("{ ")
for a in b.keys():
print ( "\t" + str(a) + " : " + str(b[a]) )
print ("}\n")
#Dictionaries are a convenient way to store data for later retrieval by name (key). Keys must be unique,
#immutable objects, and are typically strings. The values in a dictionary can be anything. For many
#applications the values are simple types such as integers and strings. It gets more interesting when
#the values in a dictionary are collections (lists, dicts, etc.) In this case, the value (an empty list or dict)
#must be initialized the first time a given key is used. While this is relatively easy to do manually,
#the defaultdict type automates and simplifies these kinds of operations.
#A defaultdict works exactly like a normal dict, but it is initialized with a function (“default factory”)
#that takes no arguments and provides the default value for a nonexistent key.
#A defaultdict will never raise a KeyError. Any key that does not exist gets the value returned by the default factory.
>>> from collections import defaultdict
>>> ice_cream = defaultdict(lambda: 'Vanilla')
>>>
>>> ice_cream = defaultdict(lambda: 'Vanilla')
>>> ice_cream['Sarah'] = 'Chunky Monkey'
>>> ice_cream['Abdul'] = 'Butter Pecan'
>>> print ice_cream['Sarah']
Chunky Monkey
>>> print ice_cream['Joe']
Vanilla
>>>
#Be sure to pass the function object to defaultdict(). Do not call the function, i.e. defaultdict(func), not defaultdict(func()).
#In the following example, a defaultdict is used for counting. The default factory is int, which in turn has a default value of zero.
#(Note: “lambda: 0″ would also work in this situation). For each food in the list, the value is incremented by one where the key is the food.
#We do not need to make sure the food is already a key – it will use the default value of zero.
>>> from collections import defaultdict
>>> food_list = 'spam spam spam spam spam spam eggs spam'.split()
>>> food_count = defaultdict(int) # default value of int is 0
>>> for food in food_list:
... food_count[food] += 1 # increment element's value by 1
...
defaultdict(<type 'int'>, {'eggs': 1, 'spam': 7})
>>>
#In the next example, we start with a list of states and cities. We want to build a dictionary
#where the keys are the state abbreviations and the values are lists of all cities for that state.
#To build this dictionary of lists, we use a defaultdict with a default factory of list. A new list
#is created for each new key.
>>> from collections import defaultdict
>>> city_list = [('TX','Austin'), ('TX','Houston'), ('NY','Albany'), ('NY', 'Syracuse'), ('NY', 'Buffalo'), ('NY', 'Rochester'), ('TX', 'Dallas'), ('CA','Sacramento'), ('CA', 'Palo Alto'), ('GA', 'Atlanta')]
>>>
>>> cities_by_state = defaultdict(list)
>>> for state, city in city_list:
... cities_by_state[state].append(city)
...
for state, cities in cities_by_state.iteritems():
... print state, ', '.join(cities)
...
#NY Albany, Syracuse, Buffalo, Rochester
#CA Sacramento, Palo Alto
#GA Atlanta
#TX Austin, Houston, Dallas
#In conclusion, whenever you need a dictionary, and each element’s value should start with a default value, use a defaultdict.
def keywithmaxval(d):
""" a) create a list of the dict's keys and values;
b) return the key with the max value"""
v=list(d.values())
k=list(d.keys())
return k[v.index(max(v))]
def filter_dict_keys(mydict,mylist):
'''
Function: filter_dict_keys
Description: Filters out keys included in exclusion list
Input: Dictionary, List of keys to be excluded
Output: New dict, without the excluded keys
Usage(print): print (filter_dict_keys(mydict,mylist))
Usage(assign): b = filter_dict_keys(mydict,mylist)
'''
outdict = dict()
for j in mydict:
if j not in mylist:
outdict[j] = mydict[j]
return outdict
a = { 1:2, 3:4, 5:6 }
b = [ 1,3 ]
print (filter_dict(a,b))
###############################################
# EXPANDED VERSION WITH MODES
def filter_dict_keys(mydict,mylist, mode = 1):
'''
Function: filter_dict_keys
Description: Filters out keys included in exclusion list
Input: Dictionary, List of keys to be excluded
Output: New dict, without the excluded keys
Usage(print): print (filter_dict_keys(mydict,mylist))
Usage(assign): b = filter_dict_keys(mydict,mylist)
Modes : 1 (Excluded - Default) / 0 (Include)
'''
outdict = dict()
if mode == 1:
for j in mydict:
if j not in mylist:
outdict[j] = mydict[j]
return outdict
else:
for j in mydict:
if j in mylist:
outdict[j] = mydict[j]
return outdict
a = { 1:2, 3:4, 5:6 }
b = [ 1,3 ]
print (filter_dict_keys(a,b,0))
def filter_array_dict(dict, store):
output = {}
for keys in dict:
# Change to "in" for reverse effect
if dict[keys] not in store:
output[keys] = dict[keys]
return output
dicta = { 1:2, 3:4, 5:6 }
lista = [ 2, 4 ]
print filter_array_dict(dicta, lista)
haystack=["Zig","Zag","Wally","Ronald","Bush","Bush","Krusty","Charlie","Bush","Bozo"]
b = [ "Zig", "Bush" ]
def find_index(lista, listb):
dicta = dict()
for needle in listb:
try:
#print (lista.index(needle), needle)
if needle not in dicta:
dicta[needle] = []
dicta[needle].append(lista.index(needle))
else:
dicta[needle].append(lista.index(needle))
except ValueError:
print (needle,"is not in haystack")
return dicta
print (find_index(haystack, b))
b = {'a':3,'b':9,'c':88,'d':3}
def dictfreq(b):
'''
Function: dictfreq
Description: Counts keys that have same values
Input: dict, limit
Output: dictionary with previous values as keys, and previous keys into array
Usage: a = dictfreq(dict,limit)
Notes: Does not edit in place
'''
counter = dict()
for k,v in b.items():
if v not in counter:
counter[v] = []
counter[v].append(k)
else:
counter[v].append(k)
return counter
print (dictfreq(b))
def count_smaller_dict(dict, limit):
'''
Function: count_larger_dict
Description: Counts keys with values smaller than limit
Input: dict, limit
Output: interger
Usage: a = count_smaller_dict(dict,limit)
Notes: Does not edit in place
'''
total = 0
for keys in dict:
if dict[keys] < limit:
total += 1
return total
def count_larger_dict(dict, limit):
'''
Function: count_larger_dict
Description: Counts keys with values larger than limit
Input: dict, limit
Output: interger
Usage: a = count_larger_dict(dict,limit)
Notes: Does not edit in place
'''
total = 0
for keys in dict:
if dict[keys] > limit:
total += 1
return total
def filter_smaller_dict(dict, limit):
output = {}
for keys in dict:
if dict[keys] < limit:
output[keys] = dict[keys]
return output
def filter_larger_dict(dict, limit):
output = {}
for keys in dict:
if dict[keys] > limit:
output[keys] = dict[keys]
return output
以上是关于python Python.DataTypes.Dictionaries的主要内容,如果未能解决你的问题,请参考以下文章