python Python.DataTypes.Dictionaries

Posted 2021-05-09

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python Python.DataTypes.Dictionaries相关的知识，希望对你有一定的参考价值。

def sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False):
    '''
    Author:     p.doulgeridis
    Name:       sort_dict_lists_adv
    Function:   sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False)
    Input:      * <Dictionary>dict_in
                * <Integer>index_in
                * <Binary - [ key | value ] 
                * <Binary - [ True | False ]
    Output:     sorted list of tuple value pairs
    Usage:      print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))
    Notes:      Dictionaries do not maintain order. The function returns a list of tuple 
                key value pairs. If we want to recast that into a dictionary, we can use
                dict(). If we want to cast that into a dictionary that can maintain order
                we need to use 'OrderedDict' from the 'collections' module.
    '''
    if reverse == False:
        if mode == "key":
            return sorted(dict_in.items(), key=lambda x: x[1], reverse=False)
        elif mode == "value":
            return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=False)
    elif reverse == True:
        if mode == "key":
            return sorted(dict_in.items(), key=lambda x: x[1], reverse=True)    
        elif mode == "value":
            return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=True)

import operator

xs = {'a': 4, 'b': 3, 'c': 2, 'd': 1}
xs2 = {'a': [ 4, 1 ], 'b': [ 3, 2 ], 'c': [ 2, 3 ], 'd': [ 1, 5] }
xs3 = {'a': [ 4, 1, 2 ], 'b': [ 3, 2, 1 ], 'c': [ 2, 3, 4 ], 'd': [ 1, 5, 3] }
xs4 = {'a': [ 4, 1, 2, 1 ], 'b': [ 3, 2, 4, 6 ], 'c': [ 2, 3, 3, 4 ], 'd': [ 1, 5, 6, 7] }

# sort dict by key, normal
print(dict(sorted(xs.items(), key=lambda x: x[1])))
print(dict(sorted(xs.items(), key=lambda x: x[0])))

# sort dict by key, reverse
print(dict(sorted(xs.items(), key=lambda x: x[1], reverse=True)))

# sort dict by key, second way
print(sorted(xs.items(), key=operator.itemgetter(1)))

# sort dict by key, second way, reverse
print(sorted(xs.items(), key=operator.itemgetter(1), reverse=True))

# sort dict of lists, by any key
print(sorted(xs2.items(), key=lambda x: x[1][1], reverse=True))
#print(sorted(xs2.items(), key=operator.itemgetter(2), reverse=True))



# ##################
# sort dict of lists, by any key
print(dict(sorted(xs3.items(), key=lambda x: x[1][2], reverse=True)))

# sort dict of lists, by any key
print(sorted(xs4.items(), key=lambda x: x[1][3], reverse=True))

# sort dict of lists, by any key
print(sorted(xs4.items(), key=lambda x: x[1][3], reverse=False))





def sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False):
    '''
    Author:     p.doulgeridis
    Name:       sort_dict_lists_adv
    Function:   sort_dict_lists_adv(dict_in, index_in, mode="key", reverse=False)
    Input:      * <Dictionary>dict_in
                * <Integer>index_in
                * <Binary - [ key | value ] 
                * <Binary - [ True | False ]
    Output:     sorted list of tuple value pairs
    Usage:      print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))
    Notes:      Dictionaries do not maintain order. The function returns a list of tuple 
                key value pairs. If we want to recast that into a dictionary, we can use
                dict(). If we want to cast that into a dictionary that can maintain order
                we need to use 'OrderedDict' from the 'collections' module.
    '''
    if reverse == False:
        if mode == "key":
            return sorted(dict_in.items(), key=lambda x: x[1], reverse=False)
        elif mode == "value":
            return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=False)
    elif reverse == True:
        if mode == "key":
            return sorted(dict_in.items(), key=lambda x: x[1], reverse=True)    
        elif mode == "value":
            return sorted(dict_in.items(), key=lambda x: x[1][index_in], reverse=True)

def sort_dict_simple(dict_in, mode="key", reverse=False):
    '''
    Author:    p.doulgeridis
    Name:      sort_dict_simple
    Function:  sort_dict_simple(dict_in, mode="key", reverse=False)
    Input:      * <Dictionary>dict_in
                * <Binary - [ key | value ] 
                * <Binary - [ True | False ]
    Output:     sorted list of tuple value pairs
    Usage:      print(sort_dict_simple(xs, mode="value", reverse=False))
    Notes:      Dictionaries do not maintain order. The function returns a list of tuple 
                key value pairs. If we want to recast that into a dictionary, we can use
                dict(). If we want to cast that into a dictionary that can maintain order
                we need to use 'OrderedDict' from the 'collections' module.
    '''
    if reverse == False:
        if mode == "key":
            return (sorted(dict_in.items(), key=lambda x: x[0], reverse=False))
        elif mode == "value":
            return (sorted(dict_in.items(), key=lambda x: x[1], reverse=False))
    elif reverse == True:
        if mode == "key":
            return (sorted(dict_in.items(), key=lambda x: x[0], reverse=True))
        elif mode == "value":
            return (sorted(dict_in.items(), key=lambda x: x[1], reverse=True))
    
print("------------------")

def sort_dict_simple_v2(dict_in, mode="key", reverse=False):
    '''
    Author:     p.doulgeridis
    Name:       sort_dict_simple_v2
    Function:   sort_dict_simple_v2(dict_in, mode="key", reverse=False)
    Input:      * <Dictionary>dict_in
                * <Binary - [ key | value ] 
                * <Binary - [ True | False ]
    Output:     sorted list of tuple value pairs
    Usage:      print(sort_dict_simple_v2(xs, mode="value", reverse=True))
    Notes:      Dictionaries do not maintain order. The function returns a list of tuple 
                key value pairs. If we want to recast that into a dictionary, we can use
                dict(). If we want to cast that into a dictionary that can maintain order
                we need to use 'OrderedDict' from the 'collections' module.
    '''
    if reverse == False:
        if mode == "key":
            try:
                return (sorted(dict_in.items(), key=operator.itemgetter(0), reverse=False))
            except:
                return "Could not sort dictionary"
        elif mode == "value":
            try:
                return (sorted(dict_in.items(), key=operator.itemgetter(1), reverse=False))
            except:
                return "Could not sort dictionary"
    elif reverse == True:    
        if mode == "key":
            return (sorted(dict_in.items(), key=operator.itemgetter(0), reverse=True))
        elif mode == "value":
            return (sorted(dict_in.items(), key=operator.itemgetter(1), reverse=True))




print(sort_dict_simple(xs, mode="key", reverse=True))
print(sort_dict_simple(xs, mode="key", reverse=False))
print(sort_dict_simple_v2(xs, mode="value", reverse=True))
print(sort_dict_simple(xs, mode="value", reverse=False))
print(sort_dict_lists_adv(xs3, 2, mode="value", reverse=False))




def twoliststozip(list1, list2):
    '''
    Author: p.doulgeridis
    Name: twoliststozip
    Function: twoliststozip(list1, list2)
    Input:    * <list>list1
              * <list>list2
    Output: Dictionary mapping of list1, list2
    Usage: b = twoliststozip(list1, list2)
    Notes: To be effective, list1, list2 needs to be
           of the same size.
    '''    
    
    if len(list1) != len(list2):
        return "Error: Input lists have different lengths"
            
        return dict(zip(list1, list2))


def twoliststozipv2(list1, list2):
    '''
    Author: p.doulgeridis
    Name: twoliststozipv2
    Function: twoliststozipv2(list1, list2)
    Input:    * <list>list1
              * <list>list2
    Output: Dictionary mapping of list1, list2
    Usage: b = twoliststozipv2(list1, list2)
    Notes: To be effective, list1, list2 needs to be
           of the same size.
    '''    
    if len(list1) != len(list2):
            return "Error: Input lists have different lengths"
    
    
    out_dict = { k: v for k, v in zip(list1, list2) }
    return out_dict


def twolisttozip_py2(list1, list2):
    '''
    Author: p.doulgeridis
    Name: twoliststozip_py2
    Function: twolisttozip_py2(list1, list2)
    Input:    * <list>list1
              * <list>list2
    Output: Dictionary mapping of list1, list2
    Usage: b = twoliststozipv2(list1, list2)
    Notes: To be effective, list1, list2 needs to be
           of the same size.
    '''
    try:
        from itertools import izip
    except ImportError:
        izip = zip
    
    if len(list1) != len(list2):
            return "Error: Input lists have different lengths"
    
    
    return dict(izip(list1, list2))

    
def twoliststozip_dictcomp(list1, list2):
    '''
    Author: p.doulgeridis
    Name: twoliststozip_dictcomp
    Function: twoliststozip_dictcomp(list1, list2)
    Input:    * <list>list1
              * <list>list2
    Output: Dictionary mapping of list1, list2
    Usage: b = twoliststozipv2(list1, list2)
    Notes: To be effective, list1, list2 needs to be
           of the same size.

    '''
    if len(list1) != len(list2):
            return "Error: Input lists have different lengths"

    out_dict = { list1[i]: list2[i] for i in range(len(list1))}
    return out_dict
    
a = [ "a", "b", "c", "d" ]
b = [ 1, 2, 3 ]


print(twoliststozip(a, b))
print(twoliststozipv2(a, b))
print(twolisttozip_py2(a, b))
print(twoliststozip_dictcomp(a, b))

Functions on dictionaries:
  
  1.  pretty_print                    : Prints a nice, readable version of the dictionary
  2.  filter_dict_keys                : Filters out keys from a dictionary, based on list provided
  3.  key_max_value                   : Return dictionary key that contains max value
  4.  remove_key                      : Remove a key from a dictionary
  5.  count_smaller                   : Count how many keys have value smaller than limit
  6.  count_larger                    : Count how many keys have value bigger than limit
  7.  filter_smaller                  : Filter keys with values smaller than limit
  8.  filter_larger                   : Filter keys with values larger than limit
  9.  filter_array_dict               : Filter keys with values in array
  10. dict_frequency                  : Counts frequency of keys per value in dict
  11. reverse_dict                    : Swaps values and keys of dictionary
  12. dict_index_search               : Searches for indexes, stores in dictionary
  13. MOD_defaultdict.py              : Tutorial to default dict module
  14. Key_Counter.py                  : Counts frequencies and outputs to different files
  15. MOD_collections_Counter.py      : (Includes default dict) - Optimised collections 
  16. MOD_collections_DefaultDict.py  : Optimised default dictionary structures
  17. MOD_collections_OrderedDict.py  : Optimised ordered dictionary structures
  15. MOD_collections_DeQueue.py      : Optimised dequeue dictionary structures
  16. MOD_collections_NamedTuple.py   : Optimised named tuple dictionary structures
  17. loadfiletodict.py               : Load text file to dictionary methods
  18. TwoListsToDict.py               : Maps two lists into a dictionary
  19. DictSorting.py                  : Sorting dictionaries by key or value
  20. DictSortingXD.py                : Sorting dictionaries of LISTS by key or value









import os
import sys





file_in = sys.argv[1]


def loadfiletodict(file_in, start, length, strip="yes"):
    '''
    Name:           loadfiletodict
    
    Function:       loadfiletodict(file_in, start, length, strip="yes")
    
    Description:    Loads a text file to a dictionary, based on specific
                    substring provided. Those substrings will be used as keys.
                    Each key will hold a list of [  <line1>, <line2> .. ]
                    *line1, line2 share the same key.
    
    Input:          1. <file_in> : Input text file (Best encoded in utf-8)                   
                    2. start     : Start of substring 
                    3. length    : End of substring
                    4. [ strip="yes" | strip="no ]
    
    Output:         (<dictionary>, <int>)
                    1. <dictionary> : produced dictionary, with substrings as keys.
                    2. <int>        : number of non duplicate unique keys (Length of dict)
                    
    Usage:          my_in = loadfiletodict(file_in, 26, 3) (default)
                    my_in = loadfiletodict(file_in, 26, 3, strip="no") (non stripping)
    
    Required:       Collections (Module -> python -m pip install collections)
                    os          (Module -> Base python distro)
    
    Notes:          Modules can be embedded in fuction call. Uncomment if need.
    
                    'defaultdict' is faster, but it returns an item of type <defaultdict>,
                    which is different than <dict>. Problems may be encountered with module
                    handling of the dict (ie, pretty print). We can cast the produced defaultdict
                    into a dict by doing:
                        dict_out = dict(default_dict)
    '''
    # import os
    import os
    
    # import collections
    try:
        import collections
    except:
        return "Cannot import collections module"
    
    # declare new default dict (use collections)
    try:
        dict_out = collections.defaultdict(list)    
    except:
        return "Could not initialize collection structure"
    
    
    # check if input file exists
    if not os.path.isfile(file_in):
        return ("ERROR: Reading dictionary.", 0)
    
    
    with open(file_in, 'r') as f:
        for line in f:
        
            # Handle new line strip 
            if strip == "yes":
                line = line.rstrip()
            
            # Calc start / end
            start_py = int(start) - 1
            end_py = start_py + int(length)
        
            # parse key
            key_in = line[start_py:end_py]
        
            dict_out[key_in].append(line)
    
    return (dict_out, len(dict_out.keys()))
    


def loadfiletodictcounter(file_in, start, length, strip="yes"):
    '''
    Name:           loadfiletodictcounter
    
    Function:       loadfiletodictcounter(file_in, start, length, strip="yes")
    
    Description:    Loads a text file to a dictionary, based on specific
                    substring provided. Those substrings will be used as keys.
                    Each key will hold a list of [  counter, <line1>, <line2> .. ]
                    *line1, line2 share the same key.
    
    Input:          1. <file_in> : Input text file (Best encoded in utf-8)                   
                    2. start     : Start of substring 
                    3. length    : End of substring
                    4. [ strip="yes" | strip="no ]
    
    Output:         (<dictionary>, <int>)
                    1. <dictionary> : produced dictionary, with substrings as keys.
                    2. <int>        : number of non duplicate unique keys (Length of dict)
                    
    Usage:          my_in = loadfiletodict(file_in, 26, 3) (default)
                    my_in = loadfiletodict(file_in, 26, 3, strip="no") (non stripping)
    
    Required:       Collections (Module -> python -m pip install collections)
                    os          (Module -> Base python distro)
    
    Notes:          Modules can be embedded in fuction call. Uncomment if need.
    
                    'defaultdict' is faster, but it returns an item of type <defaultdict>,
                    which is different than <dict>. Problems may be encountered with module
                    handling of the dict (ie, pretty print). We can cast the produced defaultdict
                    into a dict by doing:
                    dict_out = dict(default_dict)
                    
                    * Differs from simple method -> key line counter.
    '''    
    
    # import collections
    try:
        import collections
    except:
        return "Cannot import collections module"
    
    # declare new default dict (use collections)
    try:
        dict_out = collections.defaultdict(list)    
    except:
        return "Could not initialize collection structure"
    
    
    # check if input file exists
    if not os.path.isfile(file_in):
        return ("ERROR: Reading dictionary.", 0)
    
    
    with open(file_in, 'r') as f:
        for line in f:
        
            # Handle new line strip 
            if strip == "yes":
                line = line.rstrip()
        
        
            # Calc start / end
            start_py = int(start) - 1
            end_py = start_py + int(length)
        
            # parse key
            key_in = line[start_py:end_py]
        
            if key_in not in dict_out.keys():
                dict_out[key_in].append(1)
                dict_out[key_in].append(line)
            else:
                dict_out[key_in][0] += 1
                dict_out[key_in].append(line)
    
    return (dict_out, len(dict_out.keys()))


def dicttofilepy(dict_in, file_ot):
    '''
    Name: py_format
    Description: Outputs a file that contains the 
               definition of <input> ready to be
               imported (See Notes and sample code)
    Input: <input data structure> <filename>
    Output: <filename> that contains input in python format
    Usage: py_format(project_info, "OUTDATA.py")
    Notes: 
        py_format(project_info, "OUTDATA.py")
        import OUTDATA
        data = OUTDATA.allData
    Notes2: Check also in 'Python Useful Tricks'
    '''
    
    import pprint
    
    input = dict(input)
    
    # check if file_ot ends in .py
    if file_ot.lower().endswith('.py'):
        try:
            resultFile = open(file_ot, 'w')
            resultFile.write('allData = ' + pprint.pformat(input))
            resultFile.close()
        except:
            print("Problem loading data to: " + str(file_ot))
            return False
        else:
            return True




    

def py_format(input, file_ot):
    '''
    Name: py_format
    Description: Outputs a file that contains the 
               definition of <input> ready to be
               imported (See Notes and sample code)
    Input: <input data structure> <filename>
    Output: <filename> that contains input in python format
    Usage: py_format(project_info, "OUTDATA.py")
    Notes: 
        py_format(project_info, "OUTDATA.py")
        import OUTDATA
        data = OUTDATA.allData
    Notes2: Check also in 'Python Useful Tricks'
    '''
    
    import pprint
    
    input = dict(input)
    
    # check if file_ot ends in .py
    if file_ot.lower().endswith('.py'):
        try:
            resultFile = open(file_ot, 'w')
            resultFile.write('allData = ' + pprint.pformat(input))
            resultFile.close()
        except:
            print("Problem loading data to: " + str(file_ot))
            return False
        else:
            return True
    
    
    
def pretty_print(input, indent_depth):
    '''
    Name: pretty_print
    Description: pretty_prints a data structure
    Input: <List/Tuple/Dict>, indent_depth
    Output: stdout
    Usage: pretty_print(dict_in, 4)
    Notes: Works on any kind of data structure. 
    Requires: pprint module
    '''
    import pprint
    try:
        pprint.pprint(input)
        #pprint.PrettyPrinter(indent=indent_depth)
    except:
        print("Pretty print failed")    

        
        
my_in = loadfiletodict(file_in, 26, 3)
pretty_print(my_in, 3)
my_in2 = loadfiletodictcounter(file_in, 26, 3)
pretty_print(my_in2, 3)
py_format(my_in2[0], 'pydata.py')

dict_new = {
    'ena' : 1,
    'dyo' : 2
    }
    
    
py_format(dict_new, 'pydata2.py')

#from pydata2 import allData
from pydata import allData 

print(allData)

################################################################
# NAMED TUPLES

# Sometimes, it is convenient to access to a mutable element by its names rather than 
# an index. This is possible with the collections.namedtuple() function. Your first need 
# to design a structure using namedtuple function. Then, you create the named tuple.

person = collections.namedtuple("FirstName", "Surname", "age")
persons = []
persons.append(person("Alain", "Delon", 32))
persons.append(person("Jean", "Gabin", 39))

# You can now access to the first names of each tuple using the name attribute:

first_names = [x.name for x in persons]

##############################################################
# DEQUE - DOUBLE ENDED QUEUES

Double-ended queues, or deques, can be useful when you need to remove elements in the order in which 
they were added. You can find the deque functions in the collections module.

>>> from collections import deque
>>> q = deque(range(5))
>>> q.append(5)
>>> q.appendleft(6)
>>> q 
deque([6, 0, 1, 2, 3, 4, 5])
>>> q.pop()
5
>>> q.popleft()
6
>>> q.rotate(3)
>>> q 
deque([2, 3, 4, 0, 1])

#The reason for the usefulness of the deque is that it allows appending and popping efficiently at the 
#beginning (to the left), as opposed to lists. As a nice side effect, you can also rotate the elements 
#(that is, shift them to the right or left, wrapping around the ends) efficiently. Deque objects also 
#have extend and extendleft methods, with extend working like the corresponding list method, and extend
#left working analogously to appendleft. Note that the elements in the iterable used in extendleft will 
#appear in the deque in reverse order.


# Various uses
>>> from collections import deque
>>> d = deque('ghi')                 # make a new deque with three items
>>> for elem in d:                   # iterate over the deque's elements
...     print elem.upper()
G
H
I

>>> d.append('j')                    # add a new entry to the right side
>>> d.appendleft('f')                # add a new entry to the left side
>>> d                                # show the representation of the deque
deque(['f', 'g', 'h', 'i', 'j'])

>>> d.pop()                          # return and remove the rightmost item
'j'
>>> d.popleft()                      # return and remove the leftmost item
'f'
>>> list(d)                          # list the contents of the deque
['g', 'h', 'i']
>>> d[0]                             # peek at leftmost item
'g'
>>> d[-1]                            # peek at rightmost item
'i'

>>> list(reversed(d))                # list the contents of a deque in reverse
['i', 'h', 'g']
>>> 'h' in d                         # search the deque
True
>>> d.extend('jkl')                  # add multiple elements at once
>>> d
deque(['g', 'h', 'i', 'j', 'k', 'l'])
>>> d.rotate(1)                      # right rotation
>>> d
deque(['l', 'g', 'h', 'i', 'j', 'k'])
>>> d.rotate(-1)                     # left rotation
>>> d
deque(['g', 'h', 'i', 'j', 'k', 'l'])

>>> deque(reversed(d))               # make a new deque in reverse order
deque(['l', 'k', 'j', 'i', 'h', 'g'])
>>> d.clear()                        # empty the deque
>>> d.pop()                          # cannot pop from an empty deque
Traceback (most recent call last):
  File "<pyshell#6>", line 1, in -toplevel-
    d.pop()
IndexError: pop from an empty deque

>>> d.extendleft('abc')              # extendleft() reverses the input order
>>> d
deque(['c', 'b', 'a'])


# Various implementations
# This section shows various approaches to working with deques.
# Bounded length deques provide functionality similar to the tail filter in Unix:

def tail(filename, n=10):
    'Return the last n lines of a file'
    return deque(open(filename), n)

# Another approach to using deques is to maintain a sequence of recently added elements by appending to the right and popping to the left:

def moving_average(iterable, n=3):
    # moving_average([40, 30, 50, 46, 39, 44]) --> 40.0 42.0 45.0 43.0
    # http://en.wikipedia.org/wiki/Moving_average
    it = iter(iterable)
    d = deque(itertools.islice(it, n-1))
    d.appendleft(0)
    s = sum(d)
    for elem in it:
        s += elem - d.popleft()
        d.append(elem)
        yield s / float(n)

# The rotate() method provides a way to implement deque slicing and deletion. 
# For example, a pure Python implementation of del d[n] relies on the rotate() 
# method to position elements to be popped:

def delete_nth(d, n):
    d.rotate(-n)
    d.popleft()
    d.rotate(n)

# To implement deque slicing, use a similar approach applying rotate() to bring 
# a target element to the left side of the deque. Remove old entries with 
# popleft(), add new entries with extend(), and then reverse the rotation. With 
# minor variations on that approach, it is easy to implement Forth style stack 
# manipulations such as dup, drop, swap, over, pick, rot, and roll.

###########################################################
# ORDERED DICT
# OrderedDict: dict subclass that remembers the order entries were 
# added(as dictionaries doesn’t retain order when created)
# Use: We can use ordered dicts if we want dictionaries to remember order of entry.

# No order - standard use
>>> d = {“a”:1,”b”:2,”c”:3,”d”:4,”e”:5}
>>> for k,v in d.items():
… print(k,v)
… 
a 1
b 2
c 3
e 5      # <-- as normal dictionaries are just the mapping,items are out of order 
d 4

# Using ordered dict -> Now let’s do it using OrderedDict
>>> d = OrderedDict()
>>> d = {“a”:1,”b”:2,”c”:3,”d”:4,”e”:5}
>>> for k,v in d.items():
… print(k,v)
… 
a 1
b 2
c 3
d 4
e 5

# Since an ordered dictionary remembers its insertion order, it can 
# be used in conjunction with sorting to make a sorted dictionary:

>>> # regular unsorted dictionary
>>> d = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2}

>>> # dictionary sorted by key
>>> OrderedDict(sorted(d.items(), key=lambda t: t[0]))
OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])

>>> # dictionary sorted by value
>>> OrderedDict(sorted(d.items(), key=lambda t: t[1]))
OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])

>>> # dictionary sorted by length of the key string
>>> OrderedDict(sorted(d.items(), key=lambda t: len(t[0])))
OrderedDict([('pear', 1), ('apple', 4), ('orange', 2), ('banana', 3)])

# The new sorted dictionaries maintain their sort order when entries are 
# deleted. But when new keys are added, the keys are appended to the end 
# and the sort is not maintained.

# It is also straight-forward to create an ordered dictionary variant that 
# remembers the order the keys were last inserted. If a new entry overwrites 
# an existing entry, the original insertion position is changed and moved to the end:

class LastUpdatedOrderedDict(OrderedDict):
    'Store items in the order the keys were last added'

    def __setitem__(self, key, value):
        if key in self:
            del self[key]
        OrderedDict.__setitem__(self, key, value)

# An ordered dictionary can be combined with the Counter class so that the counter 
#remembers the order elements are first encountered:

class OrderedCounter(Counter, OrderedDict):
     'Counter that remembers the order elements are first encountered'

     def __repr__(self):
         return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))

     def __reduce__(self):
         return self.__class__, (OrderedDict(self),)

#####################################################
# DEFAULT DICT
# defaultdict: dict subclass that calls a factory function to supply missing values
# A defaultdict will never raise a KeyError. Any key that doesn’t exist gets the 
# value returned by the default factory.So the key takeaway it’s doesn’t throw a key 
# error instead it will initialize the default value that we placed when we 
# created the default dictionary

# Sample use - Creates an empty dictionary of lists
dict_out = collections.defaultdict(list)
 
# Standard use and problem with keyerror
>>> from collections import defaultdict
>>> mydict = {}
>>> mydict[1]
Traceback (most recent call last):
 File “<stdin>”, line 1, in <module>
KeyError: 1

# Same with defaultdict
>>> mydict = defaultdict(object)
>>> mydict[1]
<object object at 0x101217110>


# Using list as the default_factory, it is easy to group a sequence of 
#key-value pairs into a dictionary of lists:
>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
>>> d = defaultdict(list)
>>> for k, v in s:
...     d[k].append(v)
...
>>> d.items()
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]


# When each key is encountered for the first time, it is not already in 
# the mapping; so an entry is automatically created using the default_factory 
# function which returns an empty list. The list.append() operation then 
# attaches the value to the new list. When keys are encountered again, the 
# look-up proceeds normally (returning the list for that key) and the 
# list.append() operation adds another value to the list. This technique is 
# simpler and faster than an equivalent technique using dict.setdefault():

>>> d = {}
>>> for k, v in s:
...     d.setdefault(k, []).append(v)
...
>>> d.items()
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]

# Setting the default_factory to int makes the defaultdict useful for counting 
#(like a bag or multiset in other languages):

>>> s = 'mississippi'
>>> d = defaultdict(int)
>>> for k in s:
...     d[k] += 1
...
>>> d.items()
[('i', 4), ('p', 2), ('s', 4), ('m', 1)]

# When a letter is first encountered, it is missing from the mapping, so the 
# default_factory function calls int() to supply a default count of zero. The 
# increment operation then builds up the count for each letter.

# The function int() which always returns zero is just a special case of 
# constant functions. A faster and more flexible way to create constant 
# functions is to use itertools.repeat() which can supply any constant value 
# (not just zero):

>>> def constant_factory(value):
...     return itertools.repeat(value).next
>>> d = defaultdict(constant_factory('<missing>'))
>>> d.update(name='John', action='ran')
>>> '%(name)s %(action)s to %(object)s' % d
'John ran to <missing>'

# Setting the default_factory to set makes the defaultdict useful for 
# building a dictionary of sets:

>>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)]
>>> d = defaultdict(set)
>>> for k, v in s:
...     d[k].add(v)
...
>>> d.items()
[('blue', set([2, 4])), ('red', set([1, 3]))]

###################################################
# COUNTER
# Counter is a dict subclass for counting hashable objects.
# We use it to quickly create counters in dictionaries.

##
# Necessary module
from collections import Counter

##
# Uses : quickly count frequencies in list
mylist = [1,2,1,3,1,4]
print(Counter(mylist))
>> Counter({1:3, 2:1, 3:1, 4:1)}

##
# Uses: quickly count frequencies in a string
>>> mystr = “mynasdbdadddndashhhqas”
>>> Counter(mystr)
Counter({‘d’: 6, ‘a’: 4, ‘s’: 3, ‘h’: 3, ’n’: 2, ‘m’: 1, ‘y’: 1, ‘b’: 1, ‘q’: 1})

##
# Uses: count word frequencies
>>> mysent = “Welcome to the world of Python Python is an awesome langauge”
>>> mysent = mysent.split()
>>> Counter(mysent)
Counter({‘Python’: 2, ‘Welcome’: 1, ‘to’: 1, ‘the’: 1, ‘world’: 1, ‘of’: 1, ‘is’: 1, ‘an’: 1, ‘awesome’: 1, ‘langauge’: 1})

##
# Show X most common words
>>> mysent = “Welcome to the world of Python Python is an awesome langauge to learn”
>>> mysent = mysent.split()
>>> x = Counter(mysent)
>>> x.most_common(2)
[(‘to’, 2), (‘Python’, 2)]


##
# Tally occurrences of words in a list
>>> sum(x.values())
13

##
# Tally occurrences of words in a list
>>> cnt = Counter()
>>> for word in ['red', 'blue', 'red', 'green', 'blue', 'blue']:
...     cnt[word] += 1
>>> cnt
Counter({'blue': 3, 'red': 2, 'green': 1})

##
# Find the ten most common words in Hamlet
>>> import re
>>> words = re.findall(r'\w+', open('hamlet.txt').read().lower())
>>> Counter(words).most_common(10)
[('the', 1143), ('and', 966), ('to', 762), ('of', 669), ('i', 631),
 ('you', 554),  ('a', 546), ('my', 514), ('hamlet', 471), ('in', 451)]


## 
# Counter objects have a dictionary interface except that they return 
#a zero count for missing items instead of raising a KeyError:

>>> c = Counter(['eggs', 'ham'])
>>> c['bacon']                              # count of a missing element is zero
0
 

##
# Using "Elements" -> elements()
# Return an iterator over elements repeating each as many times as its count. 
# Elements are returned in arbitrary order. If an element’s count is less than 
#one, elements() will ignore it.

>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> list(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']


# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""


import os
import sys
import time


file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"






def filetoliststrip(file):
    '''
  Function: filetoliststrip
  Description: Reads a file, stores in list (stripped)
  Input: File
  Output: List
  Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
  Notes: Path needs double \\ or reverse /
  '''
    file_in = str(file)
    lines = list(open(file_in, 'r'))
    content = [x.strip() for x in lines] 
    return content
    
    
dict_in = dict()    
seen = []

    
fileinlist = filetoliststrip(file_in)
out_file = open(file_ot, 'w')
out_file2 = open(file_ot2, 'w')
out_file3 = open(file_ot3, 'w')



for line in fileinlist:
    keyf = line[10:69]
    
    if keyf not in dict_in.keys():
        dict_in[keyf] = []
        dict_in[keyf].append(1)
        dict_in[keyf].append(line)
    else:
        dict_in[keyf][0] += 1
        dict_in[keyf].append(line)
        
        
        
        
for j in dict_in.keys():
    #print(dict_in[j])
    if dict_in[j][0] < 2:
        out_file.write(dict_in[j][1])
    elif dict_in[j][0] == 2:
        out_file2.write(dict_in[j][2])
    elif dict_in[j][0] > 2:
        out_file3.write(dict_in[j][3])
        
        
out_file.close()
out_file2.close()
out_file3.close()

# source: webpy
def dictreverse(mapping):
    """
    Function: dictreverse
    Description: Swaps values and keys of dictionary
    Input: dictionary
    Output: dict with swapped key/values
    Usage : a = dictreverse(c)
    Notes : Returns a new dictionary with keys and values swapped.
        >>> dictreverse({1: 2, 3: 4})
        {2: 1, 4: 3}
    """
    return dict([(value, key) for (key, value) in mapping.items()])

c = { 'a': 1 , 'b' : 2 , 'c' : 3 }

print dictreverse(c)

#The del statement removes an element:

del d[key]
#However, this mutates the existing dictionary so the contents of the dictionary changes for anybody else who has a reference to the same instance. To return a new dictionary, make a copy of the dictionary:

def removekey(d, key):
    r = dict(d)
    del r[key]
    return r

#The dict() constructor makes a shallow copy. To make a deep copy, see the copy module.

def pretty_print(b):
  '''
  Function: pretty_print
  Description : Pretty prints a dictionary
  Input : Dictionary
  Output: STDOUT
  Usage(print) : pretty_print(b)
  Usage(Assign): b = pretty_print(b) - True
  Notes : Only prints on screen
  '''
    print ("{ ")
    for a in b.keys():
        print ( "\t" + str(a) + " : " + str(b[a]) )
    print ("}\n")

#Dictionaries are a convenient way to store data for later retrieval by name (key). Keys must be unique, 
#immutable objects, and are typically strings. The values in a dictionary can be anything. For many 
#applications the values are simple types such as integers and strings. It gets more interesting when 
#the values in a dictionary are collections (lists, dicts, etc.) In this case, the value (an empty list or dict) 
#must be initialized the first time a given key is used. While this is relatively easy to do manually, 
#the defaultdict type automates and simplifies these kinds of operations.
#A defaultdict works exactly like a normal dict, but it is initialized with a function (“default factory”) 
#that takes no arguments and provides the default value for a nonexistent key.
#A defaultdict will never raise a KeyError. Any key that does not exist gets the value returned by the default factory.

>>> from collections import defaultdict
>>> ice_cream = defaultdict(lambda: 'Vanilla')
>>>
>>> ice_cream = defaultdict(lambda: 'Vanilla')
>>> ice_cream['Sarah'] = 'Chunky Monkey'
>>> ice_cream['Abdul'] = 'Butter Pecan'
>>> print ice_cream['Sarah']
Chunky Monkey
>>> print ice_cream['Joe']
Vanilla
>>>


#Be sure to pass the function object to defaultdict(). Do not call the function, i.e. defaultdict(func), not defaultdict(func()).
#In the following example, a defaultdict is used for counting. The default factory is int, which in turn has a default value of zero. 
#(Note: “lambda: 0″ would also work in this situation). For each food in the list, the value is incremented by one where the key is the food. 
#We do not need to make sure the food is already a key – it will use the default value of zero.

>>> from collections import defaultdict
>>> food_list = 'spam spam spam spam spam spam eggs spam'.split()
>>> food_count = defaultdict(int) # default value of int is 0
>>> for food in food_list:
...     food_count[food] += 1 # increment element's value by 1
...
defaultdict(<type 'int'>, {'eggs': 1, 'spam': 7})
>>>


#In the next example, we start with a list of states and cities. We want to build a dictionary 
#where the keys are the state abbreviations and the values are lists of all cities for that state. 
#To build this dictionary of lists, we use a defaultdict with a default factory of list. A new list 
#is created for each new key.

>>> from collections import defaultdict
>>> city_list = [('TX','Austin'), ('TX','Houston'), ('NY','Albany'), ('NY', 'Syracuse'), ('NY', 'Buffalo'), ('NY', 'Rochester'), ('TX', 'Dallas'), ('CA','Sacramento'), ('CA', 'Palo Alto'), ('GA', 'Atlanta')]
>>>
>>> cities_by_state = defaultdict(list)
>>> for state, city in city_list:
...     cities_by_state[state].append(city)
...
for state, cities in cities_by_state.iteritems():
...     print state, ', '.join(cities)
...
#NY Albany, Syracuse, Buffalo, Rochester
#CA Sacramento, Palo Alto
#GA Atlanta
#TX Austin, Houston, Dallas
#In conclusion, whenever you need a dictionary, and each element’s value should start with a default value, use a defaultdict.

def keywithmaxval(d):
     """ a) create a list of the dict's keys and values; 
         b) return the key with the max value"""  
     v=list(d.values())
     k=list(d.keys())
     return k[v.index(max(v))]

def filter_dict_keys(mydict,mylist):
  '''
  Function: filter_dict_keys
  Description: Filters out keys included in exclusion list
  Input: Dictionary, List of keys to be excluded
  Output: New dict, without the excluded keys
  Usage(print): print (filter_dict_keys(mydict,mylist))
  Usage(assign): b = filter_dict_keys(mydict,mylist)
  '''
    outdict = dict()
    for j in mydict:
        if j not in mylist:
            outdict[j] = mydict[j]
    return outdict
    
a = { 1:2, 3:4, 5:6 }
b = [ 1,3 ]


print (filter_dict(a,b))

###############################################
# EXPANDED VERSION WITH MODES

def filter_dict_keys(mydict,mylist, mode = 1):
    '''
  Function: filter_dict_keys
  Description: Filters out keys included in exclusion list
  Input: Dictionary, List of keys to be excluded
  Output: New dict, without the excluded keys
  Usage(print): print (filter_dict_keys(mydict,mylist))
  Usage(assign): b = filter_dict_keys(mydict,mylist)
  Modes : 1 (Excluded - Default) / 0 (Include)
  '''
    outdict = dict()
    if mode == 1:
        for j in mydict:
            if j not in mylist:
                outdict[j] = mydict[j]
        return outdict
    else:
        for j in mydict:
            if j in mylist:
                outdict[j] = mydict[j]
        return outdict
        
        
a = { 1:2, 3:4, 5:6 }
b = [ 1,3 ]


print (filter_dict_keys(a,b,0))

def filter_array_dict(dict, store):
    output = {}
    for keys in dict:
      # Change to "in" for reverse effect
        if dict[keys] not in store:
            output[keys] = dict[keys]
    return output
    
    
dicta = { 1:2, 3:4, 5:6 }
lista = [ 2, 4 ]

print filter_array_dict(dicta, lista)

haystack=["Zig","Zag","Wally","Ronald","Bush","Bush","Krusty","Charlie","Bush","Bozo"]
b = [ "Zig", "Bush" ] 
 
def find_index(lista, listb):
    dicta = dict()
    
    for needle in listb:
        try:
            #print (lista.index(needle), needle)
            if needle not in dicta:
                dicta[needle] = []
                dicta[needle].append(lista.index(needle))
            else:
                dicta[needle].append(lista.index(needle))
        except ValueError:
            print (needle,"is not in haystack")
    return dicta            
            
print (find_index(haystack, b))

b = {'a':3,'b':9,'c':88,'d':3}


def dictfreq(b):
    '''
  Function: dictfreq
  Description: Counts keys that have same values
  Input: dict, limit
  Output: dictionary with previous values as keys, and previous keys into array
  Usage: a = dictfreq(dict,limit)
  Notes: Does not edit in place
  '''
    counter = dict()
    for k,v in b.items():
        if v not in counter:
            counter[v] = []
            counter[v].append(k)
        else:
            counter[v].append(k)
        
    return counter
    
    
print (dictfreq(b))

def count_smaller_dict(dict, limit):
    '''
  Function: count_larger_dict
  Description: Counts keys with values smaller than limit
  Input: dict, limit
  Output: interger
  Usage: a = count_smaller_dict(dict,limit)
  Notes: Does not edit in place
  '''
    total = 0
    for keys in dict:
        if dict[keys] < limit:
            total += 1
    return total

def count_larger_dict(dict, limit):
  '''
  Function: count_larger_dict
  Description: Counts keys with values larger than limit
  Input: dict, limit
  Output: interger
  Usage: a = count_larger_dict(dict,limit)
  Notes: Does not edit in place
  '''
    total = 0
    for keys in dict:
        if dict[keys] > limit:
            total += 1
    return total

def filter_smaller_dict(dict, limit):
    output = {}
    for keys in dict:
        if dict[keys] < limit:
            output[keys] = dict[keys]
    return output

    
def filter_larger_dict(dict, limit):
    output = {}
    for keys in dict:
        if dict[keys] > limit:
            output[keys] = dict[keys]
    return output

以上是关于python Python.DataTypes.Dictionaries的主要内容，如果未能解决你的问题，请参考以下文章