'''
Playing with MapReduce in python
ref.
http://mikecvet.wordpress.com/2010/07/02/parallel-mapreduce-in-python/
'''
from multiprocessing import Pool
def generate_data(A = 90000, B = 20) :
return [ [ [j] for j in range(B)] for i in range(A)]
def M( I ):
return map(lambda x: len(x), I)
def R( I ):
return reduce( lambda x,y: x+y, I)
def chunks(l, n) :
for i in xrange(0, len(l), n):
yield l[i:i+n]
if __name__ == '__main__':
P = 8
pool = Pool(P)
I = generate_data()
data = list(chunks(I, len(I)/ P ))
inter = pool.map( M, data )
res = pool.map( R, inter)
print R( res )